lidar_labeler.create_deploy_df_from_files

This script handles the creation of deployment GeoDataFrames from polygon and DEM files. It includes functions to divide polygons into grid cells and associate each cell with a raster file path. The create_deploy_df_from_files function creates a GeoDataFrame by processing polygon files and DEMs, saving the results to specified directories.

  1"""
  2This script handles the creation of deployment GeoDataFrames from polygon and DEM files. 
  3It includes functions to divide polygons into grid cells and associate each cell with a raster file path. 
  4The `create_deploy_df_from_files` function creates a GeoDataFrame by processing polygon 
  5files and DEMs, saving the results to specified directories.
  6"""
  7
  8import sys
  9import os
 10import json
 11from osgeo import gdal
 12from pathlib import Path
 13# scriptDir = os.path.dirname(os.path.abspath(__file__))
 14# parentDir = os.path.dirname(scriptDir)
 15# sys.path.append(parentDir)
 16# from lidar_labeler import batch_download_chunks as bdc
 17from lidar_labeler import batch_download_chunks as bdc
 18# global_vars = os.path.join(parentDir, 'configs', 'global_variables.json')
 19
 20# with open(global_vars, 'r') as f:
 21#     params_dict = json.load(f)
 22
 23with (Path(__file__).resolve().parent.parent / 'configs' / 'global_variables.json').open('r') as f:
 24    params_dict = json.load(f)   
 25
 26RSTR_COL_PATTERN = params_dict['RSTR_COL_PATTERN']
 27
 28def _merge_warp_dems(inFileNames:list, outFileName:str, 
 29                     outExtent = None, outEPSG:int = None, pixelSize:float=None, doReturnGdalSourceResult:bool = False,
 30                    resampleAlg:str = 'cubic', noDataValue:float = None, format:str = 'GTiff'):
 31    """Wrapper for gdal.Warp, an image mosaicing, reprojection and cropping function
 32
 33    Args:
 34        inFileNames (list): A list of all the filenames to merge
 35        outFileName (str): the output path to save the file as
 36        outExtent (list OR tuple, optional): ([minx, maxx], [miny, maxy]). Defaults to None.
 37        outEPSG (int, optional): EPSG code for the coordinate system of the specified output extent (also sets the output
 38            coordinate system). Defaults to None.
 39        pixelSize (float, optional):  Dimension of the output pixels (x and y direction) in the native units of the
 40            output coordinate system. Defaults to None.
 41        doReturnGdalSourceResult (bool, optional): If True returns the gdal source object for the newly created dataset. 
 42            If False (the default) returns none and closes the connection to the newly created dataset. Defaults to False.
 43        resampleAlg (str, optional): The resampling algorithm to use in reprojecting and merging the raster. Can be
 44            any option allowed by GDAL. Prefered options will likely be: 'near', 'bilinear', 'cubic', 'cubicspline',
 45            'average'. Defaults to 'cubic'.
 46        noDataValue (float, optional): No data value to use for the input and output data. Defaults to None.
 47        format (str, optional): File format to save the output dataset as. Defaults to 'GTiff'.
 48
 49    Returns:
 50        gridSource (None OR gdal.Dataset): If doReturnGdalSource is False, returns None. If doReturnGdalSource is True
 51            will instead return a gdal.Dataset instance representing the input raster after application of the warp.
 52    """
 53
 54    #In some ArcPro created virtual environments the path to the PROJ library (opensource projections) is not always created on startup
 55    #This will cause a gdal error when trying to transform raster bounding box coordinates to a new CRS.
 56    #This statement attempts to guess what the path should be, though there is no gurantee this will work w/ all environments
 57    if not('PROJ_LIB' in os.environ):
 58        env_path = os.environ['PATH'].split(';')[1] #The second item in the path in arcpro environments is <directory>\\environment\\Libray\\bin
 59        env_path = os.path.abspath(os.path.join(env_path ,os.pardir)) #Get the path on directory up (to library)
 60        proj_path = os.path.join(env_path,'share','proj')
 61        os.environ['PROJ_LIB'] = proj_path
 62
 63    if not(outExtent is None):
 64        outExtent = [outExtent[0][0], outExtent[1][0], outExtent[0][1], outExtent[1][1]]
 65
 66
 67        #If an output coordinate system was specified, format it for gdal
 68    if not(outEPSG is None):
 69        outEPSG = 'EPSG:{}'.format(outEPSG)
 70        #If an output bounding box was specified, format it for gdal. Leave as none if there won't be any clipping
 71
 72
 73    wrpOptions = gdal.WarpOptions(
 74        outputBounds=outExtent,
 75        outputBoundsSRS=outEPSG,
 76        format=format,
 77        xRes=pixelSize, yRes=pixelSize,
 78        resampleAlg=resampleAlg,
 79        dstSRS=outEPSG,
 80        dstNodata=noDataValue,
 81        srcNodata=noDataValue
 82    )
 83    gridSource = gdal.Warp(outFileName,inFileNames, options=wrpOptions)
 84
 85   
 86    if not(doReturnGdalSourceResult):
 87        gridSource = None
 88
 89    return gridSource
 90
 91def create_deploy_df_from_files(polygonPath:str, demPath:str, smallGridSize:int, downloadPath:str = None):
 92    """
 93    Creates a deployment GeoDataFrame from the specified polygon and DEM files, dividing the polygon area into grids
 94    and associating each grid cell with a raster file path.
 95
 96    Args:
 97        polygonPath (str): The file path to the input polygon.
 98        demPath (str or list): The path to the DEM file(s). Can be a single string or a list of DEM file paths.
 99        smallGridSize (int): The size of the small grid.
100        downloadPath (str, optional): The output directory to save the deployment dataframe and merged raster file. 
101                                      If not provided, defaults to the directory of the polygon path.
102
103    Returns:
104        medGridDf (gpd.GeoDataFrame): The deployment GeoDataFrame with medium grid geometry and paths to raster files.
105    """
106    # Extract the directory and filename from the provided polygon path
107    polygonDir, fname = os.path.split(polygonPath)
108    polyName, ext = os.path.splitext(fname)
109
110    # Set the download path to the polygon directory if not provided
111    if not downloadPath:
112        downloadPath = polygonDir
113    
114    # Create a GeoDataFrame of medium-sized grid cells from the polygon and small grid size
115    medGridDf = bdc.create_gridded_dfs_for_batch_download(polygonPath, smallGridSize, downloadPath)[1]
116
117    # Check if demPath is a list of DEM file paths
118    if isinstance(demPath, list):
119        # Define the output path for the merged raster file
120        outPath = os.path.join(downloadPath, f'{polyName}_raster_merge.tif')
121
122        # Load and process the polygon to get its extent
123        extentPoly = bdc.load_polygon_gdb_and_convert_multipolygons(polygonPath)[0:]
124        x,y = extentPoly['geometry'].exterior.coords.xy
125        mergeExtent = ([min(x), max(x)],[min(y),max(y)])
126
127        # Merge and warp the DEM files into the output path
128        _merge_warp_dems(demPath, outPath, mergeExtent)
129
130        # Assign the path of the merged raster file to each row in the GeoDataFrame
131        medGridDf[RSTR_COL_PATTERN] = outPath * len(medGridDf)
132
133    # Check if demPath is a single DEM file path
134    elif isinstance(demPath,str):
135        # Assign the single DEM file path to each row in the GeoDataFrame
136        medGridDf[RSTR_COL_PATTERN] = medGridDf.apply(lambda row: demPath, axis=1)
137        
138    else:
139        # Raise an error if demPath is neither a list nor a string
140        raise ValueError('demPath must be type list or string.')
141    
142    # Define the output path for the deployment GeoDataFrame
143    dfOutPath = os.path.join(downloadPath, f'{polyName}_mediumGrid.shp')
144
145    # Save the GeoDataFrame to a shapefile
146    medGridDf.to_file(dfOutPath, truncation=False)
147    
148    # Print the GeoDataFrame and the path where it was saved
149    print(medGridDf)
150    print(f'Deploy dataframe saved to: {dfOutPath}')
151
152    return medGridDf
153
154if __name__ == '__main__':
155    import sys
156    import json
157
158    # Load parameters from the JSON file
159    params = sys.argv[1]
160    with open(params, 'r') as f:
161        params_dict = json.load(f)
162
163    create_deploy_df_from_files(**params_dict)
RSTR_COL_PATTERN = 'rstr_paths'
def create_deploy_df_from_files( polygonPath: str, demPath: str, smallGridSize: int, downloadPath: str = None):
 92def create_deploy_df_from_files(polygonPath:str, demPath:str, smallGridSize:int, downloadPath:str = None):
 93    """
 94    Creates a deployment GeoDataFrame from the specified polygon and DEM files, dividing the polygon area into grids
 95    and associating each grid cell with a raster file path.
 96
 97    Args:
 98        polygonPath (str): The file path to the input polygon.
 99        demPath (str or list): The path to the DEM file(s). Can be a single string or a list of DEM file paths.
100        smallGridSize (int): The size of the small grid.
101        downloadPath (str, optional): The output directory to save the deployment dataframe and merged raster file. 
102                                      If not provided, defaults to the directory of the polygon path.
103
104    Returns:
105        medGridDf (gpd.GeoDataFrame): The deployment GeoDataFrame with medium grid geometry and paths to raster files.
106    """
107    # Extract the directory and filename from the provided polygon path
108    polygonDir, fname = os.path.split(polygonPath)
109    polyName, ext = os.path.splitext(fname)
110
111    # Set the download path to the polygon directory if not provided
112    if not downloadPath:
113        downloadPath = polygonDir
114    
115    # Create a GeoDataFrame of medium-sized grid cells from the polygon and small grid size
116    medGridDf = bdc.create_gridded_dfs_for_batch_download(polygonPath, smallGridSize, downloadPath)[1]
117
118    # Check if demPath is a list of DEM file paths
119    if isinstance(demPath, list):
120        # Define the output path for the merged raster file
121        outPath = os.path.join(downloadPath, f'{polyName}_raster_merge.tif')
122
123        # Load and process the polygon to get its extent
124        extentPoly = bdc.load_polygon_gdb_and_convert_multipolygons(polygonPath)[0:]
125        x,y = extentPoly['geometry'].exterior.coords.xy
126        mergeExtent = ([min(x), max(x)],[min(y),max(y)])
127
128        # Merge and warp the DEM files into the output path
129        _merge_warp_dems(demPath, outPath, mergeExtent)
130
131        # Assign the path of the merged raster file to each row in the GeoDataFrame
132        medGridDf[RSTR_COL_PATTERN] = outPath * len(medGridDf)
133
134    # Check if demPath is a single DEM file path
135    elif isinstance(demPath,str):
136        # Assign the single DEM file path to each row in the GeoDataFrame
137        medGridDf[RSTR_COL_PATTERN] = medGridDf.apply(lambda row: demPath, axis=1)
138        
139    else:
140        # Raise an error if demPath is neither a list nor a string
141        raise ValueError('demPath must be type list or string.')
142    
143    # Define the output path for the deployment GeoDataFrame
144    dfOutPath = os.path.join(downloadPath, f'{polyName}_mediumGrid.shp')
145
146    # Save the GeoDataFrame to a shapefile
147    medGridDf.to_file(dfOutPath, truncation=False)
148    
149    # Print the GeoDataFrame and the path where it was saved
150    print(medGridDf)
151    print(f'Deploy dataframe saved to: {dfOutPath}')
152
153    return medGridDf

Creates a deployment GeoDataFrame from the specified polygon and DEM files, dividing the polygon area into grids and associating each grid cell with a raster file path.

Arguments:
  • polygonPath (str): The file path to the input polygon.
  • demPath (str or list): The path to the DEM file(s). Can be a single string or a list of DEM file paths.
  • smallGridSize (int): The size of the small grid.
  • downloadPath (str, optional): The output directory to save the deployment dataframe and merged raster file. If not provided, defaults to the directory of the polygon path.
Returns:

medGridDf (gpd.GeoDataFrame): The deployment GeoDataFrame with medium grid geometry and paths to raster files.