lidar_labeler.labeler_tools
This module provides a collection of secondary functions to assist with various tasks related to training and deploying a cnn on a labeled database of DEMs.
1""" 2This module provides a collection of secondary functions to assist with various tasks related to training and 3deploying a cnn on a labeled database of DEMs. 4""" 5import sys 6import os 7# scriptDir = os.path.dirname(os.path.abspath(__file__)) 8# parentDir = os.path.dirname(scriptDir) 9# sys.path.append(parentDir) 10# from lidar_labeler import preprocessing as pp 11from lidar_labeler import preprocessing as pp 12 13import json 14import numpy as np 15import pandas as pd 16import matplotlib.pyplot as plt 17import random 18from sklearn import metrics as skmetrics 19import geopandas as gpd 20from sklearn.metrics import RocCurveDisplay 21from pathlib import Path 22 23# global_vars = os.path.join(parentDir, 'configs', 'global_variables.json') 24 25# with open(global_vars, 'r') as f: 26# params_dict = json.load(f) 27with (Path(__file__).resolve().parent.parent / 'configs' / 'global_variables.json').open('r') as f: 28 params_dict = json.load(f) 29 30Y_COL_LABEL = params_dict['Y_COL_LABEL'] 31X_COL_LABEL = params_dict['X_COL_LABEL'] 32 33def _request_yn_input(question: str): 34 '''This is a helper function to get a 'Y'/'N' (e.g., yes/no, true/false) 35 response to a question from the user via the command prompt. 36 37 It will continue to query the user untill a valid respons is issued. 38 39 Parameters 40 ---------- 41 question : str 42 The question that will be asked of the user. 43 44 Returns 45 ------- 46 binaryResponse : TYPE 47 True false based on if the user respond 'Y' (True) or 'N' (False). 48 49 ''' 50 response = str(input(question + '? (Y/N): ')).lower().strip() 51 52 if response[:1] == 'y': 53 binaryResponse = True 54 elif response[:1] == 'n': 55 binaryResponse = False 56 else: 57 print('Whoopsy, please enter Y or N') 58 binaryResponse = _request_yn_input() 59 60 return binaryResponse 61 62def load_json_params_preprocessing(filePath: str, print_info: bool=True): 63 """Load parameters from a JSON file in a custom format. 64 65 Args: 66 filePath (str): Path to the JSON file containing parameters. 67 print_info (bool, optional): If True, print information about the parameters. Defaults to True. 68 69 Returns: 70 dict: A dictionary containing the parameters. 71 """ 72 73 with open(filePath, 'r') as f: 74 params_dict = json.load(f) 75 76 path = params_dict.get('path', None) 77 dataColumn = params_dict.get('dataColumn', None) 78 labelColumn = params_dict.get('labelColumn', None) 79 labelsToRemove = params_dict.get('labelsToRemove', None) 80 labelLookupPath = params_dict.get('labelLookupPath', None) 81 label = params_dict.get('label', None) 82 dataType = params_dict.get('dataType', None) 83 scalingMethod = params_dict.get('scalingMethod', None) 84 saveDirectory = params_dict.get('saveDirectory', None) 85 doApplyGaussianFilter = params_dict.get('doApplyGaussianFilter', None) 86 sigma = params_dict.get('sigma', None) 87 evenlyDistributeDf = params_dict.get('evenlyDistributeDf', None) 88 subset = params_dict.get('subset', None) 89 90 91 if print_info: 92 # Print the parameters and their types 93 print("Path ({}): {}".format(type(path), path)) 94 print("Data Column ({}): {}".format(type(dataColumn), dataColumn)) 95 print("Label Column ({}): {}".format(type(labelColumn), labelColumn)) 96 print("Labels to Remove ({}): {}".format(type(labelsToRemove), labelsToRemove)) 97 print("Label Lookup Path ({}): {}".format(type(labelLookupPath), labelLookupPath)) 98 print("Label ({}): {}".format(type(label), label)) 99 print("Data Type ({}): {}".format(type(dataType), dataType)) 100 print("Scaling Method ({}): {}".format(type(scalingMethod), scalingMethod)) 101 print("Save Directory ({}): {}".format(type(saveDirectory), saveDirectory)) 102 print("Do Apply Gaussian Fiter ({}): {}".format(type(doApplyGaussianFilter), doApplyGaussianFilter)) 103 print("Sigma ({}): {}".format(type(sigma), sigma)) 104 print("Evenly Distribute DataFrame ({}): {}".format(type(evenlyDistributeDf), evenlyDistributeDf)) 105 print("Subset Df ({}): {}".format(type(subset), subset)) 106 107 return params_dict 108 109def check_array_for_abnormal_values(arr:np.ndarray): 110 """ 111 Check an array for NaN, infinite values, zeros, and shape consistency. 112 113 Args: 114 arr (np.ndarray): Array to be checked. 115 116 Returns: 117 dict: Dictionary containing counts of NaN, infinite, and zero values, and shape consistency. 118 119 """ 120 nanIndices = np.isnan(arr) 121 nanCount = np.sum(nanIndices) 122 123 infIndices = np.isinf(arr) 124 infCount = np.sum(infIndices) 125 126 zeroCount = np.sum(arr == 0) 127 128 # Check for consistency in shape 129 shapeConsistent = arr.shape[1:] == arr[0].shape 130 131 return { 132 "NaN count": nanCount, 133 "Infinite count": infCount, 134 "Zero count": zeroCount, 135 "Shape consistent": shapeConsistent 136 } 137 138def arrays_to_dataframe(dataArray, labelArray): 139 """Convert data and label arrays to a DataFrame. 140 141 Args: 142 dataArray (np.ndarray): Array containing data. 143 labelArray (np.ndarray): Array containing labels. 144 145 Returns: 146 pd.DataFrame: DataFrame with data and labels. 147 """ 148 dataArrayList = [dataArray[i] for i in range(dataArray.shape[0])] 149 labelList = [labelArray[i][0] for i in range(labelArray.shape[0])] 150 151 return pd.DataFrame({X_COL_LABEL: dataArrayList, Y_COL_LABEL: labelList}) 152 153def subset_arrays(dataArray: np.ndarray, labelArray: np.ndarray, subsetSize:int): 154 """Create a subset of data and label arrays. Can be used during model development to test model performance on a random subset of data. 155 156 Args: 157 dataArray (np.ndarray): Array containing data. 158 labelArray (np.ndarray): Array containing labels. 159 subsetSize (int): Size of the subset. 160 161 Returns: 162 tuple: Subset of data and label arrays. 163 """ 164 arrayDf = arrays_to_dataframe(dataArray, labelArray) 165 166 subsetDf = pp.subset_df(arrayDf, subsetSize) 167 168 subsetDataArray, subsetLabelArray = pp.make_arrays(subsetDf, dataCol = X_COL_LABEL, labelCol = Y_COL_LABEL) 169 170 return subsetDataArray, subsetLabelArray 171 172def show_performance_curve(training_result, metrics:list, savePath:str=None, label:str=None): 173 """Plot performance curves for training and validation metrics. 174 175 Args: 176 training_result (keras.callbacks.History): The history object returned by model.fit, containing training and validation metrics. 177 metrics (list): List of metrics to plot. 178 savePath (str, optional): Path to save the performance curve plot. Defaults to None. 179 label (str, optional): Label for the plot. Defaults to None. 180 """ 181 n = len(metrics) 182 _, axs = plt.subplots(1, n, figsize=(5 * n, 5)) 183 for i, metric in enumerate(metrics): 184 train_perf = training_result.history[str(metric)] 185 validation_perf = training_result.history['val_' + str(metric)] 186 187 axs[i].plot(train_perf, label=metric) 188 axs[i].plot(validation_perf, label='val_' + str(metric)) 189 axs[i].set_xlabel('Epoch') 190 axs[i].set_ylabel('Score') 191 axs[i].legend() 192 193 if savePath: 194 path = os.path.join(savePath, f'{label}_performance_curve.png') 195 print(f'Saving performance curve to: {path}') 196 plt.savefig(path) 197 plt.show() 198 199 200def show_performance_curve_history_dict(history_data:dict, metrics:list, savePath:str=None, label:str=None): 201 """Plot performance curves for training and validation metrics from a history dictionary. 202 203 Args: 204 history_data (dict): Dictionary containing performance history. 205 metrics (list): List of metrics to plot. 206 savePath (str, optional): Path to save the performance curve plot. Defaults to None. 207 label (str, optional): Label for the plot. Defaults to None. 208 """ 209 n = len(metrics) 210 _, axs = plt.subplots(1, n, figsize=(5 * n, 5)) 211 for i, metric in enumerate(metrics): 212 train_perf = history_data[str(metric)] 213 validation_perf = history_data['val_' + str(metric)] 214 215 axs[i].plot(train_perf, label=metric) 216 axs[i].plot(validation_perf, label='val_' + str(metric)) 217 axs[i].set_xlabel('Epoch') 218 axs[i].set_ylabel('Score') 219 axs[i].legend() 220 221 if savePath: 222 path = os.path.join(savePath, f'{label}_performance_curve.png') 223 print(f'Saving performance curve to: {path}') 224 plt.savefig(path) 225 plt.show() 226 227def show_roc_curve(trueLabels:np.ndarray, predLabels:np.ndarray, savePath:str=None, label:str=None): 228 """Plot ROC curve. 229 230 Args: 231 trueLabels (np.ndarray): Array of true labels. 232 predLabels (np.ndarray): Array of predicted labels. 233 savePath (str, optional): Path to save the ROC curve plot. Defaults to None. 234 label (str, optional): Label for the plot. Defaults to None. 235 """ 236 RocCurveDisplay.from_predictions(trueLabels, predLabels) 237 plt.show() 238 239 if savePath: 240 path = os.path.join(savePath, f'{label}_roc_curve.png') 241 print(f'Saving roc curve to: {path}') 242 plt.savefig(path) 243 244 245def load_label_lookup(pathToLookup:str): 246 """Load label lookup dictionary from a JSON file. 247 248 Args: 249 pathToLookup (str): Path to the JSON file containing label lookup. 250 251 Returns: 252 dict: Label lookup dictionary. 253 """ 254 with open(pathToLookup, 'r') as f: 255 return json.load(f) 256 257 258def show_confusion_matrix(yTest:np.ndarray, pred:np.ndarray, lookupDict:dict= None, savePath:str = None, label:str = None, normalize:str = None): 259 """Display a confusion matrix for evaluation. 260 261 Args: 262 yTest (array-like): True labels. 263 pred (array-like): Predicted labels. 264 lookupDict (dict, optional): Dictionary for mapping label indices to their actual labels. Defaults to None. 265 savePath (str, optional): Path to save the plot. Defaults to None. 266 label (str, optional): Label for the plot. Defaults to None. 267 normalize (str, optional): Type of normalization for the confusion matrix. 268 - None: will not normalize 269 -'true': normalizes over true population (rows) 270 -'pred': normalizes over predicted population (columns) 271 -'all': normalizes to the whole population 272 Defaults to None. 273 274 Returns: 275 None 276 """ 277 278 cm = skmetrics.confusion_matrix(yTest.flatten(),pred.flatten(), normalize=normalize) 279 if lookupDict: 280 displayLabels = [lookupDict[str(label)] for label in sorted(map(int, lookupDict.keys()))] 281 disp = skmetrics.ConfusionMatrixDisplay(cm, display_labels=displayLabels) 282 else: 283 disp = skmetrics.ConfusionMatrixDisplay(cm) 284 disp.plot() 285 if savePath and label: 286 path = os.path.join(savePath, f'{label}_confusion_matrix.png') 287 print(f'Saving confusion matirix to: {path}') 288 plt.savefig(path) 289 return None 290 291def display_rand_arrays(dataArray, labelArray, n, pathToLookup=None): 292 """Display a random selection of arrays with corresponding labels. Useful when trainign a model to make sure preprocessing 293 step achieved the desired result. 294 295 Args: 296 dataArray (array-like): Array containing data. 297 labelArray (array-like): Array containing labels. 298 n (int): Number of arrays to display. 299 pathToLookup (str, optional): Path to the JSON file containing label lookup. Defaults to None. 300 301 Returns: 302 None 303 """ 304 i_vals = random.sample(range(len(dataArray)), n) 305 306 if pathToLookup: 307 # Load the label lookup dictionary from the JSON file 308 with open(pathToLookup, 'r') as f: 309 labelLookup = json.load(f) 310 311 for i in i_vals: 312 # Plot the grid 313 plt.imshow(dataArray[i], cmap='gray') 314 315 # Get the corresponding label from the lookup dictionary 316 if pathToLookup: 317 label = labelLookup[str(labelArray[i][0])] # Convert label to string if needed 318 else: 319 label = labelArray[i][0] 320 # Set the title for the plot 321 plt.title(f'Label: {label}') 322 323 plt.colorbar() # Add a colorbar for reference 324 plt.show() # Show the plot for each iteration 325 return None 326 327def get_arrays_from_json_params(params:str, loadDf:bool = True): 328 """Load data and label arrays from a JSON parameter file. 329 330 Args: 331 params (str): Path to the JSON parameter file. 332 loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True. 333 334 Returns: 335 tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded). 336 337 """ 338 with open(params, 'r') as f: 339 params_dict = json.load(f) 340 saveDirectory=params_dict['saveDirectory'] 341 label = params_dict['label'] 342 savebaseName = os.path.join(saveDirectory, '{}_{{}}.npy'.format(label)) 343 344 X = np.load(savebaseName.format('data'), allow_pickle=True) 345 y = np.load(savebaseName.format('labels'), allow_pickle=True) 346 nrows = X.shape[1] 347 ncols = X.shape[2] 348 print(X.shape,y.shape) 349 print('NaN Value Count in X:', np.sum(np.isnan(X))) #Check for NaN Vals 350 351 if loadDf: 352 dfPath = os.path.join(saveDirectory, f'{label}_df.shp') 353 df = gpd.read_file(dfPath) 354 else: 355 df=None 356 return X, y, nrows, ncols, df 357 358def get_arrays_from_file(label:str, saveDir:str, loadDf:bool=True): 359 """ Load data and label arrays from a file. 360 361 Args: 362 label (str): Label for the arrays. 363 saveDir (str): Directory where the arrays are saved. 364 loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True. 365 366 Returns: 367 tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded). 368 """ 369 fname = f'{label}_{{}}.npy' 370 fbasename = os.path.join(saveDir, fname) 371 X = np.load(fbasename.format('data'), allow_pickle=True) 372 y = np.load(fbasename.format('labels'), allow_pickle=True) 373 nrows = X.shape[1] 374 ncols = X.shape[2] 375 376 print(X.shape,y.shape) 377 print('NaN Value Count in X:', np.sum(np.isnan(X))) #Check for NaN Vals 378 if loadDf: 379 dfPath = os.path.join(saveDir, f'{label}_df.shp') 380 df = gpd.read_file(dfPath) 381 else: 382 df=None 383 return X, y, nrows, ncols, df 384 385 386def subset_processed_df(saveDirectory:str, label:str, subset:int, newLabel:str=None): 387 """Subset a processed DataFrame based on a specified label and subset. 388 389 Args: 390 saveDirectory (str): Directory where the processed data is saved. 391 label (str): Label for the data. 392 subset (int): Size of the subset. 393 newLabel (str, optional): New label for the subset. Defaults to None. 394 395 Returns: 396 tuple: A tuple containing data array, label array, and subset DataFrame. 397 """ 398 #load df, label, and data arrays 399 X, y, _, _, df = get_arrays_from_file(label, saveDirectory) 400 401 #Put data and label arrays back into df 402 df[X_COL_LABEL] = [x for x in X] 403 df[Y_COL_LABEL] = [lab[0] for lab in y] 404 subsetDf = pp.subset_df(df, subset) 405 406 dataArray, labelArray = pp.make_arrays(subsetDf) 407 408 if newLabel: 409 savebaseName = os.path.join(saveDirectory, '{}_{{}}.npy'.format(newLabel)) 410 dfOutPath = os.path.join(saveDirectory, f'{newLabel}_df.shp') 411 else: 412 savebaseName = os.path.join(saveDirectory, '{}_subset_{{}}.npy'.format(label)) 413 dfOutPath = os.path.join(saveDirectory, f'{label}_subset_df.shp') 414 415 #Format basename for data and label arrays 416 dataArrayPath = savebaseName.format('data') 417 labelArrayPath = savebaseName.format('labels') 418 419 #Save Data and Label Arrays 420 np.save(dataArrayPath, dataArray) 421 np.save(labelArrayPath, labelArray) 422 423 #Save df 424 outDf = subsetDf.drop(columns = [X_COL_LABEL]) 425 outDf.to_file(dfOutPath) 426 print(f'Data Array Path: {dataArrayPath}\nLabel Array Path: {labelArrayPath}\nDataframe Path: {dfOutPath}') 427 428 return dataArray, labelArray, subsetDf
63def load_json_params_preprocessing(filePath: str, print_info: bool=True): 64 """Load parameters from a JSON file in a custom format. 65 66 Args: 67 filePath (str): Path to the JSON file containing parameters. 68 print_info (bool, optional): If True, print information about the parameters. Defaults to True. 69 70 Returns: 71 dict: A dictionary containing the parameters. 72 """ 73 74 with open(filePath, 'r') as f: 75 params_dict = json.load(f) 76 77 path = params_dict.get('path', None) 78 dataColumn = params_dict.get('dataColumn', None) 79 labelColumn = params_dict.get('labelColumn', None) 80 labelsToRemove = params_dict.get('labelsToRemove', None) 81 labelLookupPath = params_dict.get('labelLookupPath', None) 82 label = params_dict.get('label', None) 83 dataType = params_dict.get('dataType', None) 84 scalingMethod = params_dict.get('scalingMethod', None) 85 saveDirectory = params_dict.get('saveDirectory', None) 86 doApplyGaussianFilter = params_dict.get('doApplyGaussianFilter', None) 87 sigma = params_dict.get('sigma', None) 88 evenlyDistributeDf = params_dict.get('evenlyDistributeDf', None) 89 subset = params_dict.get('subset', None) 90 91 92 if print_info: 93 # Print the parameters and their types 94 print("Path ({}): {}".format(type(path), path)) 95 print("Data Column ({}): {}".format(type(dataColumn), dataColumn)) 96 print("Label Column ({}): {}".format(type(labelColumn), labelColumn)) 97 print("Labels to Remove ({}): {}".format(type(labelsToRemove), labelsToRemove)) 98 print("Label Lookup Path ({}): {}".format(type(labelLookupPath), labelLookupPath)) 99 print("Label ({}): {}".format(type(label), label)) 100 print("Data Type ({}): {}".format(type(dataType), dataType)) 101 print("Scaling Method ({}): {}".format(type(scalingMethod), scalingMethod)) 102 print("Save Directory ({}): {}".format(type(saveDirectory), saveDirectory)) 103 print("Do Apply Gaussian Fiter ({}): {}".format(type(doApplyGaussianFilter), doApplyGaussianFilter)) 104 print("Sigma ({}): {}".format(type(sigma), sigma)) 105 print("Evenly Distribute DataFrame ({}): {}".format(type(evenlyDistributeDf), evenlyDistributeDf)) 106 print("Subset Df ({}): {}".format(type(subset), subset)) 107 108 return params_dict
Load parameters from a JSON file in a custom format.
Arguments:
- filePath (str): Path to the JSON file containing parameters.
- print_info (bool, optional): If True, print information about the parameters. Defaults to True.
Returns:
dict: A dictionary containing the parameters.
110def check_array_for_abnormal_values(arr:np.ndarray): 111 """ 112 Check an array for NaN, infinite values, zeros, and shape consistency. 113 114 Args: 115 arr (np.ndarray): Array to be checked. 116 117 Returns: 118 dict: Dictionary containing counts of NaN, infinite, and zero values, and shape consistency. 119 120 """ 121 nanIndices = np.isnan(arr) 122 nanCount = np.sum(nanIndices) 123 124 infIndices = np.isinf(arr) 125 infCount = np.sum(infIndices) 126 127 zeroCount = np.sum(arr == 0) 128 129 # Check for consistency in shape 130 shapeConsistent = arr.shape[1:] == arr[0].shape 131 132 return { 133 "NaN count": nanCount, 134 "Infinite count": infCount, 135 "Zero count": zeroCount, 136 "Shape consistent": shapeConsistent 137 }
Check an array for NaN, infinite values, zeros, and shape consistency.
Arguments:
- arr (np.ndarray): Array to be checked.
Returns:
dict: Dictionary containing counts of NaN, infinite, and zero values, and shape consistency.
139def arrays_to_dataframe(dataArray, labelArray): 140 """Convert data and label arrays to a DataFrame. 141 142 Args: 143 dataArray (np.ndarray): Array containing data. 144 labelArray (np.ndarray): Array containing labels. 145 146 Returns: 147 pd.DataFrame: DataFrame with data and labels. 148 """ 149 dataArrayList = [dataArray[i] for i in range(dataArray.shape[0])] 150 labelList = [labelArray[i][0] for i in range(labelArray.shape[0])] 151 152 return pd.DataFrame({X_COL_LABEL: dataArrayList, Y_COL_LABEL: labelList})
Convert data and label arrays to a DataFrame.
Arguments:
- dataArray (np.ndarray): Array containing data.
- labelArray (np.ndarray): Array containing labels.
Returns:
pd.DataFrame: DataFrame with data and labels.
154def subset_arrays(dataArray: np.ndarray, labelArray: np.ndarray, subsetSize:int): 155 """Create a subset of data and label arrays. Can be used during model development to test model performance on a random subset of data. 156 157 Args: 158 dataArray (np.ndarray): Array containing data. 159 labelArray (np.ndarray): Array containing labels. 160 subsetSize (int): Size of the subset. 161 162 Returns: 163 tuple: Subset of data and label arrays. 164 """ 165 arrayDf = arrays_to_dataframe(dataArray, labelArray) 166 167 subsetDf = pp.subset_df(arrayDf, subsetSize) 168 169 subsetDataArray, subsetLabelArray = pp.make_arrays(subsetDf, dataCol = X_COL_LABEL, labelCol = Y_COL_LABEL) 170 171 return subsetDataArray, subsetLabelArray
Create a subset of data and label arrays. Can be used during model development to test model performance on a random subset of data.
Arguments:
- dataArray (np.ndarray): Array containing data.
- labelArray (np.ndarray): Array containing labels.
- subsetSize (int): Size of the subset.
Returns:
tuple: Subset of data and label arrays.
173def show_performance_curve(training_result, metrics:list, savePath:str=None, label:str=None): 174 """Plot performance curves for training and validation metrics. 175 176 Args: 177 training_result (keras.callbacks.History): The history object returned by model.fit, containing training and validation metrics. 178 metrics (list): List of metrics to plot. 179 savePath (str, optional): Path to save the performance curve plot. Defaults to None. 180 label (str, optional): Label for the plot. Defaults to None. 181 """ 182 n = len(metrics) 183 _, axs = plt.subplots(1, n, figsize=(5 * n, 5)) 184 for i, metric in enumerate(metrics): 185 train_perf = training_result.history[str(metric)] 186 validation_perf = training_result.history['val_' + str(metric)] 187 188 axs[i].plot(train_perf, label=metric) 189 axs[i].plot(validation_perf, label='val_' + str(metric)) 190 axs[i].set_xlabel('Epoch') 191 axs[i].set_ylabel('Score') 192 axs[i].legend() 193 194 if savePath: 195 path = os.path.join(savePath, f'{label}_performance_curve.png') 196 print(f'Saving performance curve to: {path}') 197 plt.savefig(path) 198 plt.show()
Plot performance curves for training and validation metrics.
Arguments:
- training_result (keras.callbacks.History): The history object returned by model.fit, containing training and validation metrics.
- metrics (list): List of metrics to plot.
- savePath (str, optional): Path to save the performance curve plot. Defaults to None.
- label (str, optional): Label for the plot. Defaults to None.
201def show_performance_curve_history_dict(history_data:dict, metrics:list, savePath:str=None, label:str=None): 202 """Plot performance curves for training and validation metrics from a history dictionary. 203 204 Args: 205 history_data (dict): Dictionary containing performance history. 206 metrics (list): List of metrics to plot. 207 savePath (str, optional): Path to save the performance curve plot. Defaults to None. 208 label (str, optional): Label for the plot. Defaults to None. 209 """ 210 n = len(metrics) 211 _, axs = plt.subplots(1, n, figsize=(5 * n, 5)) 212 for i, metric in enumerate(metrics): 213 train_perf = history_data[str(metric)] 214 validation_perf = history_data['val_' + str(metric)] 215 216 axs[i].plot(train_perf, label=metric) 217 axs[i].plot(validation_perf, label='val_' + str(metric)) 218 axs[i].set_xlabel('Epoch') 219 axs[i].set_ylabel('Score') 220 axs[i].legend() 221 222 if savePath: 223 path = os.path.join(savePath, f'{label}_performance_curve.png') 224 print(f'Saving performance curve to: {path}') 225 plt.savefig(path) 226 plt.show()
Plot performance curves for training and validation metrics from a history dictionary.
Arguments:
- history_data (dict): Dictionary containing performance history.
- metrics (list): List of metrics to plot.
- savePath (str, optional): Path to save the performance curve plot. Defaults to None.
- label (str, optional): Label for the plot. Defaults to None.
228def show_roc_curve(trueLabels:np.ndarray, predLabels:np.ndarray, savePath:str=None, label:str=None): 229 """Plot ROC curve. 230 231 Args: 232 trueLabels (np.ndarray): Array of true labels. 233 predLabels (np.ndarray): Array of predicted labels. 234 savePath (str, optional): Path to save the ROC curve plot. Defaults to None. 235 label (str, optional): Label for the plot. Defaults to None. 236 """ 237 RocCurveDisplay.from_predictions(trueLabels, predLabels) 238 plt.show() 239 240 if savePath: 241 path = os.path.join(savePath, f'{label}_roc_curve.png') 242 print(f'Saving roc curve to: {path}') 243 plt.savefig(path)
Plot ROC curve.
Arguments:
- trueLabels (np.ndarray): Array of true labels.
- predLabels (np.ndarray): Array of predicted labels.
- savePath (str, optional): Path to save the ROC curve plot. Defaults to None.
- label (str, optional): Label for the plot. Defaults to None.
246def load_label_lookup(pathToLookup:str): 247 """Load label lookup dictionary from a JSON file. 248 249 Args: 250 pathToLookup (str): Path to the JSON file containing label lookup. 251 252 Returns: 253 dict: Label lookup dictionary. 254 """ 255 with open(pathToLookup, 'r') as f: 256 return json.load(f)
Load label lookup dictionary from a JSON file.
Arguments:
- pathToLookup (str): Path to the JSON file containing label lookup.
Returns:
dict: Label lookup dictionary.
259def show_confusion_matrix(yTest:np.ndarray, pred:np.ndarray, lookupDict:dict= None, savePath:str = None, label:str = None, normalize:str = None): 260 """Display a confusion matrix for evaluation. 261 262 Args: 263 yTest (array-like): True labels. 264 pred (array-like): Predicted labels. 265 lookupDict (dict, optional): Dictionary for mapping label indices to their actual labels. Defaults to None. 266 savePath (str, optional): Path to save the plot. Defaults to None. 267 label (str, optional): Label for the plot. Defaults to None. 268 normalize (str, optional): Type of normalization for the confusion matrix. 269 - None: will not normalize 270 -'true': normalizes over true population (rows) 271 -'pred': normalizes over predicted population (columns) 272 -'all': normalizes to the whole population 273 Defaults to None. 274 275 Returns: 276 None 277 """ 278 279 cm = skmetrics.confusion_matrix(yTest.flatten(),pred.flatten(), normalize=normalize) 280 if lookupDict: 281 displayLabels = [lookupDict[str(label)] for label in sorted(map(int, lookupDict.keys()))] 282 disp = skmetrics.ConfusionMatrixDisplay(cm, display_labels=displayLabels) 283 else: 284 disp = skmetrics.ConfusionMatrixDisplay(cm) 285 disp.plot() 286 if savePath and label: 287 path = os.path.join(savePath, f'{label}_confusion_matrix.png') 288 print(f'Saving confusion matirix to: {path}') 289 plt.savefig(path) 290 return None
Display a confusion matrix for evaluation.
Arguments:
- yTest (array-like): True labels.
- pred (array-like): Predicted labels.
- lookupDict (dict, optional): Dictionary for mapping label indices to their actual labels. Defaults to None.
- savePath (str, optional): Path to save the plot. Defaults to None.
- label (str, optional): Label for the plot. Defaults to None.
- normalize (str, optional): Type of normalization for the confusion matrix.
- None: will not normalize -'true': normalizes over true population (rows) -'pred': normalizes over predicted population (columns) -'all': normalizes to the whole population Defaults to None.
Returns:
None
292def display_rand_arrays(dataArray, labelArray, n, pathToLookup=None): 293 """Display a random selection of arrays with corresponding labels. Useful when trainign a model to make sure preprocessing 294 step achieved the desired result. 295 296 Args: 297 dataArray (array-like): Array containing data. 298 labelArray (array-like): Array containing labels. 299 n (int): Number of arrays to display. 300 pathToLookup (str, optional): Path to the JSON file containing label lookup. Defaults to None. 301 302 Returns: 303 None 304 """ 305 i_vals = random.sample(range(len(dataArray)), n) 306 307 if pathToLookup: 308 # Load the label lookup dictionary from the JSON file 309 with open(pathToLookup, 'r') as f: 310 labelLookup = json.load(f) 311 312 for i in i_vals: 313 # Plot the grid 314 plt.imshow(dataArray[i], cmap='gray') 315 316 # Get the corresponding label from the lookup dictionary 317 if pathToLookup: 318 label = labelLookup[str(labelArray[i][0])] # Convert label to string if needed 319 else: 320 label = labelArray[i][0] 321 # Set the title for the plot 322 plt.title(f'Label: {label}') 323 324 plt.colorbar() # Add a colorbar for reference 325 plt.show() # Show the plot for each iteration 326 return None
Display a random selection of arrays with corresponding labels. Useful when trainign a model to make sure preprocessing step achieved the desired result.
Arguments:
- dataArray (array-like): Array containing data.
- labelArray (array-like): Array containing labels.
- n (int): Number of arrays to display.
- pathToLookup (str, optional): Path to the JSON file containing label lookup. Defaults to None.
Returns:
None
328def get_arrays_from_json_params(params:str, loadDf:bool = True): 329 """Load data and label arrays from a JSON parameter file. 330 331 Args: 332 params (str): Path to the JSON parameter file. 333 loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True. 334 335 Returns: 336 tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded). 337 338 """ 339 with open(params, 'r') as f: 340 params_dict = json.load(f) 341 saveDirectory=params_dict['saveDirectory'] 342 label = params_dict['label'] 343 savebaseName = os.path.join(saveDirectory, '{}_{{}}.npy'.format(label)) 344 345 X = np.load(savebaseName.format('data'), allow_pickle=True) 346 y = np.load(savebaseName.format('labels'), allow_pickle=True) 347 nrows = X.shape[1] 348 ncols = X.shape[2] 349 print(X.shape,y.shape) 350 print('NaN Value Count in X:', np.sum(np.isnan(X))) #Check for NaN Vals 351 352 if loadDf: 353 dfPath = os.path.join(saveDirectory, f'{label}_df.shp') 354 df = gpd.read_file(dfPath) 355 else: 356 df=None 357 return X, y, nrows, ncols, df
Load data and label arrays from a JSON parameter file.
Arguments:
- params (str): Path to the JSON parameter file.
- loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True.
Returns:
tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded).
359def get_arrays_from_file(label:str, saveDir:str, loadDf:bool=True): 360 """ Load data and label arrays from a file. 361 362 Args: 363 label (str): Label for the arrays. 364 saveDir (str): Directory where the arrays are saved. 365 loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True. 366 367 Returns: 368 tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded). 369 """ 370 fname = f'{label}_{{}}.npy' 371 fbasename = os.path.join(saveDir, fname) 372 X = np.load(fbasename.format('data'), allow_pickle=True) 373 y = np.load(fbasename.format('labels'), allow_pickle=True) 374 nrows = X.shape[1] 375 ncols = X.shape[2] 376 377 print(X.shape,y.shape) 378 print('NaN Value Count in X:', np.sum(np.isnan(X))) #Check for NaN Vals 379 if loadDf: 380 dfPath = os.path.join(saveDir, f'{label}_df.shp') 381 df = gpd.read_file(dfPath) 382 else: 383 df=None 384 return X, y, nrows, ncols, df
Load data and label arrays from a file.
Arguments:
- label (str): Label for the arrays.
- saveDir (str): Directory where the arrays are saved.
- loadDf (bool, optional): Whether to load a GeoDataFrame. Defaults to True.
Returns:
tuple: A tuple containing data array, label array, number of rows, number of columns, and GeoDataFrame (if loaded).
387def subset_processed_df(saveDirectory:str, label:str, subset:int, newLabel:str=None): 388 """Subset a processed DataFrame based on a specified label and subset. 389 390 Args: 391 saveDirectory (str): Directory where the processed data is saved. 392 label (str): Label for the data. 393 subset (int): Size of the subset. 394 newLabel (str, optional): New label for the subset. Defaults to None. 395 396 Returns: 397 tuple: A tuple containing data array, label array, and subset DataFrame. 398 """ 399 #load df, label, and data arrays 400 X, y, _, _, df = get_arrays_from_file(label, saveDirectory) 401 402 #Put data and label arrays back into df 403 df[X_COL_LABEL] = [x for x in X] 404 df[Y_COL_LABEL] = [lab[0] for lab in y] 405 subsetDf = pp.subset_df(df, subset) 406 407 dataArray, labelArray = pp.make_arrays(subsetDf) 408 409 if newLabel: 410 savebaseName = os.path.join(saveDirectory, '{}_{{}}.npy'.format(newLabel)) 411 dfOutPath = os.path.join(saveDirectory, f'{newLabel}_df.shp') 412 else: 413 savebaseName = os.path.join(saveDirectory, '{}_subset_{{}}.npy'.format(label)) 414 dfOutPath = os.path.join(saveDirectory, f'{label}_subset_df.shp') 415 416 #Format basename for data and label arrays 417 dataArrayPath = savebaseName.format('data') 418 labelArrayPath = savebaseName.format('labels') 419 420 #Save Data and Label Arrays 421 np.save(dataArrayPath, dataArray) 422 np.save(labelArrayPath, labelArray) 423 424 #Save df 425 outDf = subsetDf.drop(columns = [X_COL_LABEL]) 426 outDf.to_file(dfOutPath) 427 print(f'Data Array Path: {dataArrayPath}\nLabel Array Path: {labelArrayPath}\nDataframe Path: {dfOutPath}') 428 429 return dataArray, labelArray, subsetDf
Subset a processed DataFrame based on a specified label and subset.
Arguments:
- saveDirectory (str): Directory where the processed data is saved.
- label (str): Label for the data.
- subset (int): Size of the subset.
- newLabel (str, optional): New label for the subset. Defaults to None.
Returns:
tuple: A tuple containing data array, label array, and subset DataFrame.