Source code for quantammsim.core_simulator.result_exporter

import hashlib
import json
import os

import numpy as np
from jax import config

from quantammsim.core_simulator.param_utils import NumpyEncoder, dict_of_jnp_to_np

# again, this only works on startup!
config.update("jax_enable_x64", True)

np.seterr(all="raise")
np.seterr(under="print")

# TODO above is all from jax utils, tidy up required


[docs] def get_run_location(run_fingerprint): """ Generates a unique identifier string based on the provided run fingerprint. The function takes a dictionary representing the run fingerprint, converts it to a JSON string, and then computes its SHA-256 hash. The resulting hash is used to create a unique identifier string with a "run\_" prefix. Parameters ---------- run_fingerprint : dict A dictionary representing the run fingerprint. Returns ------- str A unique identifier string formatted as "run\_" followed by a SHA-256 hash """ run_location = "run_" + str( hashlib.sha256( json.dumps(run_fingerprint, sort_keys=True).encode("utf-8"), usedforsecurity=False, ).hexdigest() ) return run_location
[docs] def append_json(new_data, filename): """ Append new data to a JSON file. This function reads the existing data from a JSON file, appends the new data to it, and then writes the updated data back to the file. Args: new_data (dict): The new data to be appended to the JSON file. filename (str): The path to the JSON file. Raises: FileNotFoundError: If the specified file does not exist. json.JSONDecodeError: If the file contains invalid JSON. """ with open(filename, "r+", encoding="utf-8") as file: # First we load existing data into a dict. file_data = json.load(file) file_data = json.loads(file_data) # Join new_data with file_data inside emp_details file_data.append(new_data) # Sets file's current position at offset. file.seek(0) # convert back to json. dumped = json.dumps(file_data, cls=NumpyEncoder, sort_keys=True) json.dump(dumped, file, indent=4)
[docs] def append_list_json(new_data, filename): """ Append new data to a JSON file. This function reads the existing data from a JSON file, appends the new data to it, and then writes the updated data back to the file. Args: new_data (list): The new data to be appended to the JSON file. filename (str): The path to the JSON file. Raises: FileNotFoundError: If the specified file does not exist. json.JSONDecodeError: If the file contains invalid JSON. """ with open(filename, "r+", encoding="utf-8") as file: # First we load existing data into a dict. file_data = json.load(file) file_data = json.loads(file_data) # Join new_data with file_data inside emp_details file_data += new_data # Sets file's current position at offset. file.seek(0) # convert back to json. dumped = json.dumps(file_data, cls=NumpyEncoder, sort_keys=True) json.dump(dumped, file, indent=4)
[docs] def save_multi_params( run_fingerprint, params, test_objective, train_objective, objective, local_learning_rate, iterations_since_improvement, steps, continuous_test_metrics=None, validation_metrics=None, sorted_tokens=True, ): """ Save multiple parameter sets along with their associated metrics to a JSON file. Parameters ---------- run_fingerprint : dict Dictionary containing run configuration details used to generate unique run location params : list List of parameter dictionaries to save test_objective : list List of objective values/metrics on test set for each parameter set train_objective : list List of objective values/metrics on training set for each parameter set objective : list List of overall objective values for each parameter set local_learning_rate : list List of learning rates used for each parameter set iterations_since_improvement : list List tracking iterations without improvement for each parameter set steps : list List of step counts for each parameter set continuous_test_metrics : list, optional List of continuous test metrics for each parameter set validation_metrics : list, optional List of validation metrics for each parameter set (when using val_fraction > 0) sorted_tokens : bool, optional Whether tokens are sorted alphabetically, by default True Notes ----- Saves the data to a JSON file at ``./results/run_<sha256_hash>.json`` where the hash is generated from the run_fingerprint using SHA-256. If file exists, appends new parameter sets to existing data Converts JAX arrays to numpy arrays before saving """ run_location = "./results/" + get_run_location(run_fingerprint) + ".json" for i, param in enumerate(params): if param.get("subsidary_params") is not None: param["subsidary_params"] = [ dict_of_jnp_to_np(sp) for sp in param["subsidary_params"] ] param["step"] = steps[i] param["test_objective"] = test_objective[i] param["train_objective"] = train_objective[i] param["objective"] = objective[i] param["hessian_trace"] = 0 param["local_learning_rate"] = local_learning_rate[i] param["iterations_since_improvement"] = iterations_since_improvement[i] if continuous_test_metrics is not None: param["continuous_test_metrics"] = continuous_test_metrics[i] if validation_metrics is not None: param["validation_metrics"] = validation_metrics[i] params[i] = dict_of_jnp_to_np(param) if sorted_tokens: run_fingerprint["alphabetic"] = True if os.path.isfile(run_location) is False: results = [run_fingerprint] + params dumped = json.dumps(results, cls=NumpyEncoder, sort_keys=True) os.makedirs(os.path.dirname(run_location), exist_ok=True) with open(run_location, "w", encoding="utf-8") as json_file: json.dump(dumped, json_file, indent=4) else: append_list_json(params, run_location)
[docs] def save_optuna_results_sgd_format( run_fingerprint, study, n_assets, sorted_tokens=True, ): """ Save optuna study results in the same format as SGD training results. This allows optuna-optimized parameters to be loaded and analyzed with the same tools used for SGD-trained parameters. Parameters ---------- run_fingerprint : dict Dictionary containing run configuration details study : optuna.Study Completed optuna study object n_assets : int Number of assets in the pool (needed to reconstruct array params) sorted_tokens : bool, optional Whether tokens are sorted alphabetically, by default True Notes ----- Saves to ``./results/run_<sha256_hash>.json`` in the same format as save_multi_params, allowing unified result analysis. """ import optuna run_location = "./results/" + get_run_location(run_fingerprint) + ".json" # Get all completed trials completed_trials = [ t for t in study.trials if t.state == optuna.trial.TrialState.COMPLETE ] if not completed_trials: return # Nothing to save params_list = [] for trial in completed_trials: # Convert flattened optuna params (log_k_0, log_k_1) to arrays (log_k: [v0, v1]) param_dict = _optuna_params_to_arrays(trial.params, n_assets) # Add metadata in SGD format param_dict["step"] = trial.number param_dict["test_objective"] = float(trial.user_attrs.get("validation_value", float("-inf"))) param_dict["train_objective"] = float(trial.user_attrs.get("train_value", float("-inf"))) param_dict["objective"] = float(trial.value) if trial.value is not None else float("-inf") param_dict["hessian_trace"] = 0 # Not applicable for optuna param_dict["local_learning_rate"] = 0 # Not applicable for optuna param_dict["iterations_since_improvement"] = 0 # Not applicable for optuna # Add additional optuna-specific metrics param_dict["optuna_trial_number"] = trial.number param_dict["validation_sharpe"] = float(trial.user_attrs.get("validation_sharpe", float("-inf"))) param_dict["validation_return"] = float(trial.user_attrs.get("validation_return", float("-inf"))) param_dict["train_sharpe"] = float(trial.user_attrs.get("train_sharpe", float("-inf"))) param_dict["train_return"] = float(trial.user_attrs.get("train_return", float("-inf"))) param_dict["validation_returns_over_hodl"] = float( trial.user_attrs.get("validation_returns_over_hodl", float("-inf")) ) param_dict["train_returns_over_hodl"] = float( trial.user_attrs.get("train_returns_over_hodl", float("-inf")) ) # Convert any remaining jax arrays to numpy param_dict = dict_of_jnp_to_np(param_dict) params_list.append(param_dict) if sorted_tokens: run_fingerprint["alphabetic"] = True # Mark as optuna-trained for downstream analysis run_fingerprint["training_method"] = "optuna" if os.path.isfile(run_location) is False: results = [run_fingerprint] + params_list dumped = json.dumps(results, cls=NumpyEncoder, sort_keys=True) os.makedirs(os.path.dirname(run_location), exist_ok=True) with open(run_location, "w", encoding="utf-8") as json_file: json.dump(dumped, json_file, indent=4) else: append_list_json(params_list, run_location) return run_location
def _optuna_params_to_arrays(optuna_params, n_assets): """ Convert flattened optuna params to array format. Optuna stores params as: {'log_k_0': v0, 'log_k_1': v1, ...} This converts to: {'log_k': [v0, v1, ...]} Parameters ---------- optuna_params : dict Flattened parameter dictionary from optuna trial n_assets : int Number of assets to expect Returns ------- dict Parameter dictionary with arrays """ import jax.numpy as jnp import re result = {} # Group params by base name base_names = set() for key in optuna_params.keys(): # Match patterns like "log_k_0", "logit_lamb_1", etc. match = re.match(r"^(.+)_(\d+)$", key) if match: base_names.add(match.group(1)) else: # Scalar param - keep as is result[key] = optuna_params[key] # Convert indexed params to arrays for base_name in base_names: values = [] for i in range(n_assets): key = f"{base_name}_{i}" if key in optuna_params: values.append(float(optuna_params[key])) else: # Missing index - this shouldn't happen but handle gracefully break if values: result[base_name] = jnp.array(values) return result
[docs] def save_params( run_fingerprint, params, step, test_objective, train_objective, objective, hess, local_learning_rate, iterations_since_improvement, sorted_tokens=True, ): """ Save optimization parameters and results to a JSON file. Parameters ---------- run_fingerprint : dict Dictionary containing run configuration details params : dict Dictionary of optimization parameters step : int Current optimization step count test_objective : float Objective function value on test data train_objective : float Objective function value on training data objective : float Overall objective function value hess : float Trace of the Hessian matrix local_learning_rate : float Current learning rate iterations_since_improvement : int Number of iterations without improvement sorted_tokens : bool, optional Whether tokens are sorted alphabetically, by default True Notes ----- Saves the data to a JSON file at ``./results/run_<sha256_hash>.json`` where the hash is generated from the run_fingerprint using SHA-256. If file exists, appends new parameter set to existing data Converts JAX arrays to numpy arrays before saving """ run_location = "./results/" + get_run_location(run_fingerprint) + ".json" params["subsidary_params"] = [ dict_of_jnp_to_np(sp) for sp in params["subsidary_params"] ] params = dict_of_jnp_to_np(params) params["step"] = step params["test_objective"] = test_objective params["train_objective"] = train_objective params["objective"] = objective params["hessian_trace"] = hess params["local_learning_rate"] = local_learning_rate params["iterations_since_improvement"] = iterations_since_improvement if sorted_tokens: run_fingerprint["alphabetic"] = True if os.path.isfile(run_location) is False: dumped = json.dumps([run_fingerprint, params], cls=NumpyEncoder, sort_keys=True) with open(run_location, "w", encoding="utf-8") as json_file: json.dump(dumped, json_file) else: append_json(params, run_location)