diff --git a/.gitignore b/.gitignore index 4a84706..3d70bb0 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__/ *.py[cod] *$py.class +es_test_env/* \ No newline at end of file diff --git a/examples/example_run_metrics.py b/examples/example_run_metrics.py new file mode 100644 index 0000000..2374f74 --- /dev/null +++ b/examples/example_run_metrics.py @@ -0,0 +1,189 @@ +import pygad +import numpy + +""" +Example demonstrating the Run Metrics Recorder feature in PyGAD. + +This example shows how to: +1. Record metrics during the genetic algorithm run() method +2. Access the recorded metrics directly +3. Export metrics to a CSV file using to_csv() +4. Visualize metrics using plot_metrics() + +The metrics recorded for each generation include: +- generation: Generation number +- time_elapsed: Time taken for this generation (seconds) +- best_fitness: Best fitness value in the population +- mean_fitness: Average fitness value of the population +- diversity: Population diversity (average gene variance) +""" + +def fitness_func(ga_instance, solution, solution_idx): + """ + Simple fitness function that sums the product of solution with predefined inputs. + This is a classic optimization problem: finding weights that produce a desired output. + """ + function_inputs = [4, -2, 3.5, 5, -11, -4.7] + desired_output = 44 + + output = numpy.sum(solution * function_inputs) + fitness = 1.0 / (numpy.abs(output - desired_output) + 0.000001) + return fitness + +def on_generation(ga_instance): + """ + Callback function called after each generation. + Demonstrates how to access metrics during the run. + """ + if ga_instance.generations_completed % 10 == 0: + print(f"Generation: {ga_instance.generations_completed}") + if ga_instance.run_metrics and len(ga_instance.run_metrics['generation']) > 0: + latest_idx = len(ga_instance.run_metrics['generation']) - 1 + print(f" Best Fitness: {ga_instance.run_metrics['best_fitness'][latest_idx]:.6f}") + print(f" Mean Fitness: {ga_instance.run_metrics['mean_fitness'][latest_idx]:.6f}") + print(f" Diversity: {ga_instance.run_metrics['diversity'][latest_idx]:.6f}") + +# Configure the genetic algorithm +num_generations = 50 +num_parents_mating = 4 +sol_per_pop = 20 +num_genes = 6 + +print("=" * 60) +print("PyGAD Run Metrics Recorder Example") +print("=" * 60) +print(f"\nConfiguration:") +print(f" - Number of generations: {num_generations}") +print(f" - Population size: {sol_per_pop}") +print(f" - Number of genes: {num_genes}") +print(f" - Parents mating: {num_parents_mating}") + +# Create the GA instance +ga_instance = pygad.GA(num_generations=num_generations, + num_parents_mating=num_parents_mating, + sol_per_pop=sol_per_pop, + num_genes=num_genes, + fitness_func=fitness_func, + on_generation=on_generation, + random_seed=42) + +print("\n" + "=" * 60) +print("Running the Genetic Algorithm...") +print("=" * 60) + +# Run the GA - metrics are automatically recorded during run() +ga_instance.run() + +print("\n" + "=" * 60) +print("Accessing Recorded Metrics") +print("=" * 60) + +# Check if metrics were recorded +if ga_instance.run_metrics is not None: + num_metrics = len(ga_instance.run_metrics['generation']) + print(f"\nTotal generations recorded: {num_metrics}") + print(f"\nMetrics keys available: {list(ga_instance.run_metrics.keys())}") + + # Display first few generations + print("\nFirst 5 generations:") + print(f"{'Generation':<12} {'Time (s)':<12} {'Best Fitness':<15} {'Mean Fitness':<15} {'Diversity':<15}") + print("-" * 70) + for i in range(min(5, num_metrics)): + gen = ga_instance.run_metrics['generation'][i] + t = ga_instance.run_metrics['time_elapsed'][i] + best = ga_instance.run_metrics['best_fitness'][i] + mean = ga_instance.run_metrics['mean_fitness'][i] + div = ga_instance.run_metrics['diversity'][i] + print(f"{gen:<12} {t:<12.6f} {best:<15.6f} {mean:<15.6f} {div:<15.6f}") + + # Display last few generations + print("\nLast 5 generations:") + print(f"{'Generation':<12} {'Time (s)':<12} {'Best Fitness':<15} {'Mean Fitness':<15} {'Diversity':<15}") + print("-" * 70) + start_idx = max(0, num_metrics - 5) + for i in range(start_idx, num_metrics): + gen = ga_instance.run_metrics['generation'][i] + t = ga_instance.run_metrics['time_elapsed'][i] + best = ga_instance.run_metrics['best_fitness'][i] + mean = ga_instance.run_metrics['mean_fitness'][i] + div = ga_instance.run_metrics['diversity'][i] + print(f"{gen:<12} {t:<12.6f} {best:<15.6f} {mean:<15.6f} {div:<15.6f}") + + # Calculate summary statistics + total_time = sum(ga_instance.run_metrics['time_elapsed']) + avg_time = total_time / num_metrics + max_diversity = max(ga_instance.run_metrics['diversity']) + min_diversity = min(ga_instance.run_metrics['diversity']) + + print("\n" + "=" * 60) + print("Summary Statistics") + print("=" * 60) + print(f"\nTotal execution time: {total_time:.4f} seconds") + print(f"Average time per generation: {avg_time:.4f} seconds") + print(f"Maximum diversity: {max_diversity:.6f}") + print(f"Minimum diversity: {min_diversity:.6f}") + print(f"Final best fitness: {ga_instance.run_metrics['best_fitness'][-1]:.6f}") + print(f"Final mean fitness: {ga_instance.run_metrics['mean_fitness'][-1]:.6f}") + +print("\n" + "=" * 60) +print("Exporting Metrics to CSV") +print("=" * 60) + +# Export to CSV +try: + ga_instance.to_csv('ga_run_metrics.csv') + print("\nMetrics successfully exported to 'ga_run_metrics.csv'") + print("\nCSV file contains the following columns:") + print(" - generation: Generation number") + print(" - time_elapsed: Time taken for the generation (seconds)") + print(" - best_fitness: Best fitness value") + print(" - mean_fitness: Average fitness value") + print(" - diversity: Population diversity (gene variance)") +except Exception as e: + print(f"Error exporting to CSV: {e}") + +print("\n" + "=" * 60) +print("Visualizing Metrics") +print("=" * 60) + +# Plot metrics +print("\nGenerating metrics visualization...") +print("The plot will show 4 subplots:") +print(" 1. Best Fitness Over Generations") +print(" 2. Mean Fitness Over Generations") +print(" 3. Time Elapsed Per Generation") +print(" 4. Population Diversity Over Generations") + +try: + # Create the plot + fig = ga_instance.plot_metrics( + title="Genetic Algorithm Run Metrics", + save_dir='ga_run_metrics.png' + ) + print("\nPlot saved as 'ga_run_metrics.png'") +except Exception as e: + print(f"Error generating plot: {e}") + +# Get the best solution +print("\n" + "=" * 60) +print("Best Solution") +print("=" * 60) + +solution, solution_fitness, solution_idx = ga_instance.best_solution() +print(f"\nParameters of the best solution: {solution}") +print(f"Fitness value of the best solution: {solution_fitness}") + +if ga_instance.best_solution_generation != -1: + print(f"Best fitness value reached after {ga_instance.best_solution_generation} generations.") + +print("\n" + "=" * 60) +print("Example Complete") +print("=" * 60) +print("\nFiles created:") +print(" - ga_run_metrics.csv: CSV file with all recorded metrics") +print(" - ga_run_metrics.png: Visualization of the metrics") +print("\nTips:") +print(" - Metrics are automatically recorded during run()") +print(" - Access raw data via ga_instance.run_metrics dictionary") +print(" - Use to_csv() for spreadsheet analysis") +print(" - Use plot_metrics() for visual analysis") diff --git a/pygad/pygad.py b/pygad/pygad.py index f17bd99..a8a9495 100644 --- a/pygad/pygad.py +++ b/pygad/pygad.py @@ -1,5 +1,6 @@ import numpy import cloudpickle +import csv from pygad import utils from pygad import helper from pygad import visualize @@ -188,6 +189,120 @@ def save(self, filename): file.write(cloudpickle_serialized_object) cloudpickle.dump(self, file) + def to_csv(self, filename, delimiter=','): + """ + Exports the recorded run metrics to a CSV file. + + This method exports the metrics recorded during the `run()` method to a CSV file. + The metrics include generation number, time elapsed, best fitness, mean fitness, + and population diversity (gene variance). + + Parameters + ---------- + filename : str + Name of the CSV file to save the metrics. The '.csv' extension will be + added if not present. + + delimiter : str, optional + Delimiter to use in the CSV file. Default is ','. + + Returns + ------- + None + + Notes + ----- + This method can only be called after completing at least 1 generation. + If no generation is completed, a RuntimeError is raised. + + For multi-objective optimization problems, the `best_fitness` and `mean_fitness` + columns will contain multiple values separated by semicolons. + + Examples + -------- + >>> import pygad + >>> import numpy + >>> + >>> def fitness_func(ga_instance, solution, solution_idx): + ... output = numpy.sum(solution * [4, -2, 3.5, 5, -11, -4.7]) + ... fitness = 1.0 / (numpy.abs(output - 44) + 0.000001) + ... return fitness + >>> + >>> ga_instance = pygad.GA(num_generations=10, + ... num_parents_mating=4, + ... sol_per_pop=8, + ... num_genes=6, + ... fitness_func=fitness_func) + >>> + >>> ga_instance.run() + >>> ga_instance.to_csv('ga_metrics.csv') + """ + if self.run_metrics is None or len(self.run_metrics['generation']) == 0: + self.logger.error("The to_csv() method can only be called after completing at least 1 generation.") + raise RuntimeError("The to_csv() method can only be called after completing at least 1 generation.") + + # Add .csv extension if not present + if not filename.endswith('.csv'): + filename = filename + '.csv' + + # Prepare headers + headers = ['generation', 'time_elapsed', 'best_fitness', 'mean_fitness', 'diversity'] + + # Check if this is a multi-objective problem + # If best_fitness is a list/tuple/ndarray, then it's multi-objective + is_multi_objective = False + if len(self.run_metrics['best_fitness']) > 0: + first_best_fitness = self.run_metrics['best_fitness'][0] + if type(first_best_fitness) in [list, tuple, numpy.ndarray]: + is_multi_objective = True + + with open(filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile, delimiter=delimiter) + + # Write header + writer.writerow(headers) + + # Write data rows + num_rows = len(self.run_metrics['generation']) + for i in range(num_rows): + generation = self.run_metrics['generation'][i] + time_elapsed = self.run_metrics['time_elapsed'][i] + best_fitness = self.run_metrics['best_fitness'][i] + mean_fitness = self.run_metrics['mean_fitness'][i] + diversity = self.run_metrics['diversity'][i] + + # Format fitness values for multi-objective problems + if is_multi_objective: + # Convert list/tuple/ndarray to semicolon-separated string + if type(best_fitness) in [list, tuple, numpy.ndarray]: + best_fitness_str = ';'.join(str(v) for v in best_fitness) + else: + best_fitness_str = str(best_fitness) + + if type(mean_fitness) in [list, tuple, numpy.ndarray]: + mean_fitness_str = ';'.join(str(v) for v in mean_fitness) + else: + mean_fitness_str = str(mean_fitness) + else: + best_fitness_str = str(best_fitness) + mean_fitness_str = str(mean_fitness) + + # Format diversity (handle NaN) + if numpy.isnan(diversity): + diversity_str = 'NaN' + else: + diversity_str = str(diversity) + + writer.writerow([ + generation, + time_elapsed, + best_fitness_str, + mean_fitness_str, + diversity_str + ]) + + self.logger.info(f"Run metrics saved to {filename}") + def load(filename): """ Reads a saved instance of the genetic algorithm: diff --git a/pygad/utils/engine.py b/pygad/utils/engine.py index 91d9ae8..6aaa7db 100644 --- a/pygad/utils/engine.py +++ b/pygad/utils/engine.py @@ -2,6 +2,8 @@ import random import warnings import concurrent.futures +import time +import csv class GAEngine: @@ -447,10 +449,70 @@ def run(self): if self.save_best_solutions: self.best_solutions.append(list(best_solution)) + # Initialize run metrics recorder if not already initialized. + # This allows calling run() multiple times while extending the metrics. + if self.run_metrics is None: + self.run_metrics = { + 'generation': [], + 'time_elapsed': [], + 'best_fitness': [], + 'mean_fitness': [], + 'diversity': [] + } + + # Track time for each generation. + # start_time marks the beginning of the period to measure. + start_time = time.time() + # is_first_generation helps handle the first generation differently (no elapsed time before it). + is_first_generation = True + for generation in range(generation_first_idx, generation_last_idx): self.run_loop_head(best_solution_fitness) + # Record metrics for the current generation after run_loop_head() + # because run_loop_head() appends to best_solutions_fitness. + current_generation = len(self.run_metrics['generation']) + + # Calculate time elapsed. + # For the first generation, time_elapsed is 0 because no operations + # have been performed yet for this generation. + if is_first_generation: + time_elapsed = 0.0 + is_first_generation = False + else: + time_elapsed = time.time() - start_time + + # Calculate mean fitness - handle both single-objective and multi-objective. + if type(self.last_generation_fitness[0]) in self.supported_int_float_types: + # Single-objective: mean is a single value. + mean_fitness = numpy.mean(self.last_generation_fitness) + else: + # Multi-objective: mean is a list of means for each objective. + mean_fitness = numpy.mean(self.last_generation_fitness, axis=0).tolist() + + # Calculate diversity as the average variance of all genes across the population. + # This measures how genetically diverse the population is. + try: + # Convert population to float for variance calculation. + pop_float = numpy.array(self.population, dtype=float) + # Calculate variance for each gene, then take the mean. + gene_variances = numpy.var(pop_float, axis=0) + diversity = numpy.mean(gene_variances) + except (TypeError, ValueError): + # If population contains non-numeric values, set diversity to NaN. + diversity = float('nan') + + # Append all metrics. + self.run_metrics['generation'].append(current_generation) + self.run_metrics['time_elapsed'].append(time_elapsed) + self.run_metrics['best_fitness'].append(best_solution_fitness) + self.run_metrics['mean_fitness'].append(mean_fitness) + self.run_metrics['diversity'].append(diversity) + + # Reset start time for next generation. + start_time = time.time() + # Call the 'run_select_parents()' method to select the parents. # It edits these 2 instance attributes: # 1) last_generation_parents: A NumPy array of the selected parents. @@ -582,6 +644,36 @@ def run(self): pop_fitness=self.last_generation_fitness) self.best_solutions_fitness.append(best_solution_fitness) + # Record metrics for the last generation after the loop ends. + # The loop recorded generations 0 to num_generations-1. + # Now we record generation num_generations. + if self.run_metrics is not None and len(self.run_metrics['generation']) > 0: + current_generation = len(self.run_metrics['generation']) + + # Calculate time elapsed for the last generation. + time_elapsed = time.time() - start_time + + # Calculate mean fitness. + if type(self.last_generation_fitness[0]) in self.supported_int_float_types: + mean_fitness = numpy.mean(self.last_generation_fitness) + else: + mean_fitness = numpy.mean(self.last_generation_fitness, axis=0).tolist() + + # Calculate diversity. + try: + pop_float = numpy.array(self.population, dtype=float) + gene_variances = numpy.var(pop_float, axis=0) + diversity = numpy.mean(gene_variances) + except (TypeError, ValueError): + diversity = float('nan') + + # Append all metrics. + self.run_metrics['generation'].append(current_generation) + self.run_metrics['time_elapsed'].append(time_elapsed) + self.run_metrics['best_fitness'].append(best_solution_fitness) + self.run_metrics['mean_fitness'].append(mean_fitness) + self.run_metrics['diversity'].append(diversity) + self.best_solution_generation = numpy.where(numpy.array( self.best_solutions_fitness) == numpy.max(numpy.array(self.best_solutions_fitness)))[0][0] # After the run() method completes, the run_completed flag is changed from False to True. diff --git a/pygad/utils/validation.py b/pygad/utils/validation.py index 6c61bea..167fae9 100644 --- a/pygad/utils/validation.py +++ b/pygad/utils/validation.py @@ -1367,6 +1367,10 @@ def validate_parameters(self, # Supported in PyGAD 3.2.0. It holds the pareto fronts when solving a multi-objective problem. self.pareto_fronts = None + # Added for run metrics recorder. Holds metrics for each generation during run(). + # Structure: {'generation': [], 'time_elapsed': [], 'best_fitness': [], 'mean_fitness': [], 'diversity': []} + self.run_metrics = None + def validate_multi_stop_criteria(self, stop_word, number): if stop_word == 'reach': pass diff --git a/pygad/visualize/plot.py b/pygad/visualize/plot.py index 3341c84..160e754 100644 --- a/pygad/visualize/plot.py +++ b/pygad/visualize/plot.py @@ -516,3 +516,155 @@ def plot_pareto_front_curve(self, matplt.show() return fig + + def plot_metrics(self, + title="PyGAD - Run Metrics", + font_size=12, + figsize=(12, 8), + plot_type="plot", + colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"], + labels=["Best Fitness", "Mean Fitness", "Time Elapsed (s)", "Diversity"], + save_dir=None, + show=True): + """ + Creates, shows, and returns a figure with 4 subplots showing the run metrics + recorded during the genetic algorithm evolution. + + This method visualizes the metrics recorded by the run metrics recorder, including: + - Best fitness over generations + - Mean fitness over generations + - Time elapsed per generation + - Population diversity (gene variance) over generations + + Parameters + ---------- + title : str, optional + Main title of the figure. Default is "PyGAD - Run Metrics". + + font_size : int, optional + Font size for labels and titles. Default is 12. + + figsize : tuple, optional + Figure size as (width, height) in inches. Default is (12, 8). + + plot_type : str, optional + Type of plot. Can be "plot", "scatter", or "bar". Default is "plot". + + colors : list, optional + List of 4 colors for the 4 subplots. Default is: + ["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728"] (blue, orange, green, red). + + labels : list, optional + List of 4 labels for the 4 subplots' y-axes. Default is: + ["Best Fitness", "Mean Fitness", "Time Elapsed (s)", "Diversity"]. + + save_dir : str, optional + Directory path to save the figure. If None, the figure is not saved. + + show : bool, optional + Whether to show the figure. Default is True. + + Returns + ------- + matplotlib.figure.Figure + The matplotlib figure object containing all subplots. + + Notes + ----- + This method can only be called after completing at least 1 generation. + If no generation is completed, a RuntimeError is raised. + + For multi-objective optimization problems, only the first objective is plotted + for best fitness and mean fitness. Use separate plots for other objectives. + + Examples + -------- + >>> import pygad + >>> import numpy + >>> + >>> def fitness_func(ga_instance, solution, solution_idx): + ... output = numpy.sum(solution * [4, -2, 3.5, 5, -11, -4.7]) + ... fitness = 1.0 / (numpy.abs(output - 44) + 0.000001) + ... return fitness + >>> + >>> ga_instance = pygad.GA(num_generations=10, + ... num_parents_mating=4, + ... sol_per_pop=8, + ... num_genes=6, + ... fitness_func=fitness_func) + >>> + >>> ga_instance.run() + >>> fig = ga_instance.plot_metrics() + """ + if self.run_metrics is None or len(self.run_metrics['generation']) == 0: + self.logger.error("The plot_metrics() method can only be called after completing at least 1 generation.") + raise RuntimeError("The plot_metrics() method can only be called after completing at least 1 generation.") + + matplt = get_matplotlib() + + generations = numpy.array(self.run_metrics['generation']) + best_fitness = numpy.array(self.run_metrics['best_fitness']) + mean_fitness = numpy.array(self.run_metrics['mean_fitness']) + time_elapsed = numpy.array(self.run_metrics['time_elapsed']) + diversity = numpy.array(self.run_metrics['diversity']) + + # Check if multi-objective - extract first objective if so + is_multi_objective = False + if len(best_fitness) > 0: + if type(self.run_metrics['best_fitness'][0]) in [list, tuple, numpy.ndarray]: + is_multi_objective = True + # Extract first objective for plotting + best_fitness = numpy.array([bf[0] for bf in self.run_metrics['best_fitness']]) + if type(self.run_metrics['mean_fitness'][0]) in [list, tuple, numpy.ndarray]: + mean_fitness = numpy.array([mf[0] for mf in self.run_metrics['mean_fitness']]) + + # Create figure with 2x2 subplots + fig, axs = matplt.subplots(2, 2, figsize=figsize) + fig.suptitle(title, fontsize=font_size + 2, fontweight='bold') + + # Flatten axs for easier indexing + axs = axs.flatten() + + # Prepare data and labels + metrics_data = [ + (best_fitness, labels[0], colors[0], "Best Fitness Over Generations"), + (mean_fitness, labels[1], colors[1], "Mean Fitness Over Generations"), + (time_elapsed, labels[2], colors[2], "Time Elapsed Per Generation"), + (diversity, labels[3], colors[3], "Population Diversity Over Generations") + ] + + # Plot each metric + for idx, (data, ylabel, color, subplot_title) in enumerate(metrics_data): + ax = axs[idx] + + if plot_type == "plot": + ax.plot(generations, data, color=color, linewidth=2, marker='o', markersize=4) + elif plot_type == "scatter": + ax.scatter(generations, data, color=color, s=30, alpha=0.7) + elif plot_type == "bar": + ax.bar(generations, data, color=color, alpha=0.7) + + ax.set_title(subplot_title, fontsize=font_size) + ax.set_xlabel("Generation", fontsize=font_size - 2) + ax.set_ylabel(ylabel, fontsize=font_size - 2) + ax.grid(True, alpha=0.3) + ax.tick_params(axis='both', labelsize=font_size - 3) + + # Adjust layout + matplt.tight_layout(rect=[0, 0, 1, 0.96]) + + # Add multi-objective note if applicable + if is_multi_objective: + fig.text(0.5, 0.01, + "Note: Only the first objective is shown for fitness metrics (multi-objective problem).", + ha='center', fontsize=font_size - 2, style='italic') + + # Save figure if requested + if not save_dir is None: + fig.savefig(fname=save_dir, bbox_inches='tight', dpi=150) + + # Show figure if requested + if show: + matplt.show() + + return fig