Source code for HAlphaAnomalyzer._cell_average_calculator

# Copyright (C) 2024  Mahsa Khazaei, Heba Mahdi, Azim Ahmadzadeh

# This file is part of H-Alpha Anomalyzer.
#
# H-Alpha Anomalyzer is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
#
# H-Alpha Anomalyzer is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along with H-Alpha Anomalyzer. If not, see <https://www.gnu.org/licenses/>.


import os
import cv2
import pandas as pd
import numpy as np
from tqdm import tqdm


[docs]def _calculate_cell_average_per_image(image_path, label, grid_size=8): """ Calculate the average pixel value for each cell in a grid for a given image. This function reads the image from the specified path in grayscale, divides it into a grid of the given size, and computes the average pixel value for each cell. Parameters ---------- image_path : str The path to the image file. label : int The label indicating if the image is anomalous (1) or non-anomalous (0). grid_size : int, optional The number of rows and columns to divide the image into, by default 8. Returns ------- image_data : List[List[str, int, int, float, int]] A list containing the calculated average pixel values for each grid cell in the image. """ image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE) height, width = image.shape cell_height = height // grid_size cell_width = width // grid_size image_data = [] for row in range(grid_size): for column in range(grid_size): y1, y2 = row * cell_height, (row + 1) * cell_height x1, x2 = column * cell_width, (column + 1) * cell_width cell = image[y1:y2, x1:x2] cell_pixel_avg = np.mean(cell) image_name = os.path.basename(image_path) image_data.append([image_name, row, column, cell_pixel_avg, label]) return image_data
[docs]def _calculate_cell_average_per_batch(image_paths, label, grid_size=8, desc="Processing Training Images"): """ Process a batch of training images and compute the average pixel value for each cell in each image. This function iterates through a list of image paths, processing each image to calculate the average pixel values for its grid cells. Parameters ---------- image_paths : List[str] A list of paths to the image files. label : int The label indicating if the images are anomalous (1) or non-anomalous (0). grid_size : int, optional The number of rows and columns to divide each image into, by default 8. desc : str, optional Description for the tqdm progress bar, by default "Processing Training Images". Returns ------- result_df : pd.DataFrame A DataFrame containing the calculated average pixel values for each grid cell in each training image. """ data = [] for image_path in tqdm(image_paths, desc=desc): image_data = _calculate_cell_average_per_image(image_path, label, grid_size) data.extend(image_data) columns = ['image_name', 'row', 'column', 'cell_pixel_avg', 'label'] result_df = pd.DataFrame(data, columns=columns) return result_df
[docs]def _calculate_cell_average(non_anomalous_paths=None, anomalous_paths=None, grid_size=8): """ Calculate the average pixel value for each cell in a grid for batches of anomalous and/or non-anomalous training images. This function processes batches of anomalous and/or non-anomalous image paths, computing the average pixel values for their grid cells. Parameters ---------- non_anomalous_paths : List[str], optional A list of paths to non-anomalous image files, by default None. anomalous_paths : List[str], optional A list of paths to anomalous image files, by default None. grid_size : int, optional The number of rows and columns to divide each image into, by default 8. Returns ------- result_df : pd.DataFrame A DataFrame containing the calculated average pixel values for each grid cell in the anomalous and/or non-anomalous training image batches. """ non_anomalous_df = pd.DataFrame() anomalous_df = pd.DataFrame() if non_anomalous_paths: non_anomalous_df = _calculate_cell_average_per_batch( non_anomalous_paths, 0, grid_size, "Processing Non-Anomalous Training Images" ) if anomalous_paths: anomalous_df = _calculate_cell_average_per_batch( anomalous_paths, 1, grid_size, "Processing Anomalous Training Images" ) result_df = pd.concat([non_anomalous_df, anomalous_df], ignore_index=True) return result_df