from pathlib import Path
newpath = Path("../../../../Data/Downselected.parq")#This should point to the Dataset you wish to Catalogize
db = io.DBManager(newpath)
#Setting the Logger to Level 10 so we get all the INFO printed out
LOGGER.setLevel(10)Catalog Production
execute_in_parallel
def execute_in_parallel(
func:Callable, # The function to be executed for each element
iterable:Iterable, # The iterable over which to execute the function
max_workers:int | None=None, # Number of parallel workers. Defaults to ProcessPoolExecutor default.
description:str='Processing', # Label for the tqdm progress bar.
): # Successful results in submission order (skipping failures).
Execute a function in parallel over an iterable with per-item error handling.
Unlike pool.map, individual failures do not abort the entire batch.
blotch_id_generator
def blotch_id_generator(
)->Generator:
Generator for blotch marking IDs (prefix ‘B’).
fan_id_generator
def fan_id_generator(
)->Generator:
Generator for fan marking IDs (prefix ‘F’).
marking_id_generator
def marking_id_generator(
prefix:str, # Single-character prefix, e.g. "F" for fans or "B" for blotches.
)->Generator:
Generator for unique marking IDs with the given prefix.
cluster_obsid
def cluster_obsid(
obsid:NoneType=None, # HiRISE obsid (= Planet four image_name)
savedir:NoneType=None, # Top directory path where the catalog will be stored. Will create folder if it
does not exist yet.
imgid:NoneType=None, # Convenience parameter: If `obsid` is not given and therefore is None, this `image_id` can
be used to receive the respective `obsid` from the TileID class.
dbname:NoneType=None, # Path to the database file.
):
Cluster all image_ids for given obsid (=image_name).
cluster_obsid_parallel
def cluster_obsid_parallel(
obsids:list, # List of the obsids to cluster
savedir:str, # path to the save directory which will save the clustering results
dbname:str, # The database name
): # Successful results.
Apply the Clustering Algorithm for multiple obsids in parallel.
Individual failures are logged and skipped instead of aborting the batch.
fnotch_obsid_parallel
def fnotch_obsid_parallel(
obsids:list, # List of the Obsids to fnotch
savedir:str, # the directory path where to save
): # Successful results.
Applies the fnotching for multiple obsid’s in parallel.
Individual failures are logged and skipped instead of aborting the batch.
fnotch_obsid
def fnotch_obsid(
obsid:NoneType=None, # The observation ID to be processed.
savedir:NoneType=None, # The directory where the results will be saved.
fnotch_via_obsid:bool=False, # Switch to control if fnotching happens per observation ID (obsid) or per image ID.
If True, fnotching is done per observation ID. If False, fnotching is done per image ID.
imgid:NoneType=None, # The image ID to be processed. This parameter is currently not used in the function.
): # The observation ID that was processed.
Perform fnotching on HiRISE images based on observation ID or image ID.
add_marking_ids
def add_marking_ids(
path, # Path to L1A image_id clustering result directory
fan_id, blotch_id
):
Add marking_ids for catalog to cluster results.
get_L1A_paths
def get_L1A_paths(
obsid, # HiRISE observation ID
datapath, # Top-level catalog/clustering directory
): # L1A directories for each tile within the obsid
Return all L1A result directories for an obsid.
create_roi_file
def create_roi_file(
obsids, # List of HiRISE obsids
roi_name, # Name for ROI
datapath, # Path to the top folder with the clustering output data.
):
Create a Region of Interest file, based on list of obsids.
For more structured analysis processes, we can create a summary file for a list of obsids belonging to a ROI. The alternative is to define to what ROI any final object belongs to and add that as a column in the final catalog.
ReleaseManager
def ReleaseManager(
version, obsids:NoneType=None, overwrite:bool=False, dbname:NoneType=None
):
Class to manage releases and find relevant files. TODO better description Parameters ———- version : str Version string for this catalog. Same as datapath in other P4 code. obsids : iterable, optional Iterable of obsids that should be used for catalog file. Default is to use the full list of the default database, which is Seasons 2 and 3 at this point. overwrite : bool, optional Switch to control if already existing result folders for an obsid should be overwritten. Default: False
read_csvfiles_into_lists_of_frames
def read_csvfiles_into_lists_of_frames(
folders
):
Reads CSV files from given folders into lists of DataFrames. This function iterates over a list of folders, reads CSV files within those folders, and categorizes them into two lists: ‘fan’ and ‘blotch’. The categorization is based on the filename ending with ‘fans.csv’ or blotch.csv. Args: folders (list of pathlib.Path): A list of folder paths to search for CSV files. Returns: dict: A dictionary with two keys, ‘fan’ and ‘blotch’, each containing a list of pandas DataFrames read from the CSV files.
To create the Catalog from the Planet4 raw dataset we will use the pipeline in the following simple way.
##The Relaese Manager is our main object that stores the important data like the path to the database
##aswell as the name of the Folder to store the data
rm = ReleaseManager("p4tools_test", dbname=db.dbname, overwrite=False)
#Check for which Images need to be finished and were not analysed yet.
rm.check_for_todo()
rm.todo#Finally Launch the production pipeline
rm.launch_catalog_production()