Source code for HAlphaAnomalyzer._cell_range_calculator

# Copyright (C) 2024  Mahsa Khazaei, Heba Mahdi, Azim Ahmadzadeh

# This file is part of H-Alpha Anomalyzer.
#
# H-Alpha Anomalyzer is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
#
# H-Alpha Anomalyzer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with H-Alpha Anomalyzer. If not, see <https://www.gnu.org/licenses/>.


import numpy as np
import pandas as pd
from tqdm import tqdm


[docs]def _calculate_range_values(data_cells, lower_range=2, upper_range=98): """ Calculate the lower and upper percentage values of a grid cell pixel averages from the training image data. Parameters ---------- data_cells : pd.DataFrame The DataFrame containing the grid cell pixel averages from the training image data. lower_range : float, optional The lower percentage to calculate, by default 2. upper_range : float, optional The upper percentage to calculate, by default 98. Returns ------- lower_range_val : float The lower percentage value of the grid cell pixel averages from the training image data. upper_range_val : float The upper percentage value of the grid cell pixel averages from the training image data. """ lower_range_val = np.percentile(data_cells['cell_pixel_avg'], lower_range) upper_range_val = np.percentile(data_cells['cell_pixel_avg'], upper_range) return lower_range_val, upper_range_val
[docs]def _calculate_cell_wise_ranges(images_data, grid_size=8, lower_range_end=20, upper_range_start=80, step_size=2): """ Calculate candidate upper and lower percentage values for each grid cell of the training images data for the One-way ANOVA F-test. Parameters ---------- images_data : pd.DataFrame The DataFrame containing the training images data. grid_size : int, optional The number of rows and columns to divide each image into, by default 8. lower_range_end : int, optional The end of candidate lower ranges, by default 20. upper_range_start : int, optional The start of candidate upper ranges, by default 80. step_size : int, optional The step size for candidate ranges, by default 2. Returns ------- df_all_ranges : pd.DataFrame A DataFrame with candidate ranges for each grid cell of the training images data. """ all_ranges = [] for image_name in tqdm(images_data['image_name'].unique(), desc="Computing Candidate Ranges"): image_data = images_data[images_data['image_name'] == image_name] for lower_range in range(0, lower_range_end, step_size): for upper_range in range(upper_range_start, 100, step_size): for row in range(grid_size): for column in range(grid_size): data_cells = images_data[ (images_data['row'] == row) & (images_data['column'] == column) ] lower_range_val, upper_range_val = _calculate_range_values( data_cells, lower_range, upper_range ) data_cell = image_data[ (image_data['row'] == row) & (image_data['column'] == column) ] if not data_cell.empty: all_ranges.append([ image_name, row, column, lower_range, upper_range, lower_range_val, upper_range_val, data_cell['cell_pixel_avg'].values[0], data_cell['label'].values[0] ]) columns = [ 'image_name', 'row', 'column', 'lower_range', 'upper_range', 'lower_range_val', 'upper_range_val', 'cell_pixel_avg', 'label' ] df_all_ranges = pd.DataFrame(all_ranges, columns=columns) return df_all_ranges