From 0fc089b3985d1aa7fc4c014eee1a9d652f499373 Mon Sep 17 00:00:00 2001 From: Patrick Ryan Date: Tue, 27 Oct 2020 00:13:47 -0400 Subject: [PATCH 1/2] comment fixes, README updates, remove extra sympy imports --- README.md | 15 +++--- aifeynman/RPN_to_pytorch.py | 8 ++-- aifeynman/S_NN_get_gradients.py | 2 +- aifeynman/S_NN_train.py | 4 +- aifeynman/S_add_bf_on_numbers_on_pareto.py | 6 +-- aifeynman/S_add_snap_expr_on_pareto.py | 22 ++++----- aifeynman/S_add_sym_on_pareto.py | 2 +- aifeynman/S_brute_force.py | 6 +-- aifeynman/S_brute_force_comp.py | 10 ++-- aifeynman/S_brute_force_gen_sym.py | 8 ++-- aifeynman/S_brute_force_number.py | 6 +-- aifeynman/S_combine_pareto.py | 4 +- aifeynman/S_final_gd.py | 12 ++--- aifeynman/S_gen_sym.py | 6 +-- aifeynman/S_get_number_DL.py | 4 +- aifeynman/S_get_symbolic_expr_error.py | 2 +- aifeynman/S_gradient_decomposition.py | 10 ++-- aifeynman/S_polyfit_utils.py | 6 +-- aifeynman/S_remove_input_neuron.py | 2 +- aifeynman/S_run_aifeynman.py | 16 +++---- aifeynman/S_run_bf_polyfit.py | 20 ++++---- aifeynman/S_separability.py | 48 +++++++++---------- aifeynman/S_snap.py | 14 +++--- aifeynman/S_symmetry.py | 54 +++++++++++----------- aifeynman/__init__.py | 3 +- aifeynman/dimensionalAnalysis.py | 16 +++---- aifeynman/getPowers.py | 2 +- aifeynman/get_demos.py | 2 +- aifeynman/get_pareto.py | 47 +++++++++++-------- aifeynman/test_points.py | 4 +- 30 files changed, 187 insertions(+), 174 deletions(-) diff --git a/README.md b/README.md index abce905..9035872 100644 --- a/README.md +++ b/README.md @@ -16,17 +16,17 @@ Move into a clean directory and run the following Python commands: import aifeynman aifeynman.get_demos("example_data") # Download examples from server - aifeynman.run_aifeynman("./example_data/", "example1.txt", 60, "14ops.txt", polyfit_deg=3, NN_epochs=500) + aifeynman.run_aifeynman("./example_data/", "example1.txt", 30, "14ops.txt", polyfit_deg=3, NN_epochs=500) This example will get solved in about 10-30 minutes depending on what computer you have and whether you have a GPU. -Here ‘example.txt’ contains the data table to perform symbolic regression on, with columns separated by spaces, commas or tabs. The other parameters control the search: here the brute-force modules tries combinations of the 14 basic operations in ‘14ops.txt’ for up to 30 seconds, polynomial fits are tried up to degree 3, and the interpolating neural network is trained for up to 500 epochs. +Here ‘example1.txt’ contains the data table to perform symbolic regression on, with columns separated by spaces, commas or tabs. The other parameters control the search: here the brute-force modules tries combinations of the 14 basic operations in ‘14ops.txt’ for up to 30 seconds, polynomial fits are tried up to degree 3, and the interpolating neural network is trained for up to 500 epochs. # AI-Feynman This code is an improved implementation of AI Feynman: a Physics-Inspired Method for Symbolic Regression, Silviu-Marian Udrescu and Max Tegmark (2019) [[Science Advances](https://advances.sciencemag.org/content/6/16/eaay2631/tab-pdf)] and AI Feynman 2.0: Pareto-optimal symbolic regression exploiting graph modularity, Udrescu S.M. et al. (2020) [[arXiv](https://arxiv.org/abs/2006.10782)]. -Please check [this Medium article](https://towardsdatascience.com/ai-feynman-2-0-learning-regression-equations-from-data-3232151bd929) for a more detailed eplanation of how to get the code running. +Please check [this Medium article](https://towardsdatascience.com/ai-feynman-2-0-learning-regression-equations-from-data-3232151bd929) for a more detailed explanation of how to get the code running. In order to get started, run compile.sh to compile the fortran files used for the brute force code. @@ -43,10 +43,11 @@ The main function of the code, called by the user, has the following parameters: * vars_name - name of the variables appearing in the equation (inluding the name ofthe output variable). This should be passed as a list of strings, with the name of the variables appearing in the same order as they are in the file containing the data * test_percentage - percentage of the input data to be kept aside and used as the test set -The data file to be analyzed should be a text file with each column containing the numerical values of each (dependent and independent) variable. The solution file will be saved in the directory called "results" under the name solution_{filename}. The solution file will contain several rows (corresponding to each point on the Pareto frontier), each row showing: +The data file to be analyzed should be a text file with each column containing the numerical values of each (dependent and independent) variable. The solution file will be saved in the directory called "results" under the name solution_{filename}. The solution file will contain several rows (corresponding to each point on the [Pareto frontier](# https://en.wikipedia.org/wiki/Pareto_efficiency#Pareto_frontier +)), each row showing: -* the mean logarithm in based 2 of the error of the discovered equation applied to the input data (this can be though of as the average error in bits) -* the cummulative logarithm in based 2 of the error of the discovered equation applied to the input data (this can be though of as the cummulative error in bits) +* the mean logarithm in base 2 of the error of the discovered equation applied to the input data (this can be thought of as the average error in bits) +* the cummulative logarithm in base 2 of the error of the discovered equation applied to the input data (this can be thought of as the cummulative error in bits) * the complexity of the discovered equation (in bits) * the error of the discovered equation applied to the input data * the symbolic expression of the discovered equation @@ -72,3 +73,5 @@ If you compare with, build on, or use aspects of the AI Feynman work, please cit publisher={American Association for the Advancement of Science} } ``` +--- +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) diff --git a/aifeynman/RPN_to_pytorch.py b/aifeynman/RPN_to_pytorch.py index 7f61ce1..fa5afa8 100644 --- a/aifeynman/RPN_to_pytorch.py +++ b/aifeynman/RPN_to_pytorch.py @@ -21,13 +21,13 @@ from .S_get_number_DL_snapped import get_number_DL_snapped from .S_get_symbolic_expr_error import get_symbolic_expr_error -# parameters: path to data, RPN expression (obtained from bf) +# Parameters: path to data, RPN expression (obtained from bf) def RPN_to_pytorch(data, math_expr, lr = 1e-2, N_epochs = 500): param_dict = {} unsnapped_param_dict = {'p':1} def unsnap_recur(expr, param_dict, unsnapped_param_dict): - """Recursively transform each numerical value into a learnable parameter.""" + # Recursively transform each numerical value into a learnable parameter. import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): @@ -47,7 +47,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict): def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): - """Get the next available key that does not collide with the keys in the dictionary.""" + # Get the next available key that does not collide with the keys in the dictionary. if key + suffix not in iterable: return key + suffix else: @@ -95,7 +95,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr y = torch.from_numpy(data[:,-1]).float() for i in range(N_epochs): - # this order is fixed i.e. first parameters + # This order is fixed i.e. first parameters yy = f(*input) loss = torch.mean((yy-y)**2) loss.backward() diff --git a/aifeynman/S_NN_get_gradients.py b/aifeynman/S_NN_get_gradients.py index 51b4e1e..239f1da 100644 --- a/aifeynman/S_NN_get_gradients.py +++ b/aifeynman/S_NN_get_gradients.py @@ -1,4 +1,4 @@ -# SAve a file with 2*(n-1) columns contaning the (n-1) independent variables and the (n-1) gradients of the trained NN with respect these variables +# Save a file with 2*(n-1) columns containing the (n-1) independent variables and the (n-1) gradients of the trained NN with respect to these variables import matplotlib.pyplot as plt import numpy as np diff --git a/aifeynman/S_NN_train.py b/aifeynman/S_NN_train.py index 7c8aa1e..de5aa48 100644 --- a/aifeynman/S_NN_train.py +++ b/aifeynman/S_NN_train.py @@ -100,8 +100,8 @@ def forward(self, x): x = self.linear5(x) return x - my_dataset = utils.TensorDataset(factors,product) # create your datset - my_dataloader = utils.DataLoader(my_dataset, batch_size=bs, shuffle=True) # create your dataloader + my_dataset = utils.TensorDataset(factors,product) # Create your datset + my_dataloader = utils.DataLoader(my_dataset, batch_size=bs, shuffle=True) # Create your dataloader if is_cuda: model_feynman = SimpleNet(n_variables).cuda() diff --git a/aifeynman/S_add_bf_on_numbers_on_pareto.py b/aifeynman/S_add_bf_on_numbers_on_pareto.py index 5753df6..ea3be5c 100644 --- a/aifeynman/S_add_bf_on_numbers_on_pareto.py +++ b/aifeynman/S_add_bf_on_numbers_on_pareto.py @@ -1,4 +1,4 @@ -# Adds on the pareto all the snapped versions of a given expression (all paramters are snapped in the end) +# Adds on the pareto to all the snapped versions of a given expression (all parameters are snapped in the end) import numpy as np import matplotlib.pyplot as plt @@ -28,7 +28,7 @@ from .S_get_number_DL_snapped import get_number_DL_snapped -# parameters: path to data, math (not RPN) expression +# Parameters: path to data, math (not RPN) expression def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr): input_data = np.loadtxt(pathdir+filename) def unsnap_recur(expr, param_dict, unsnapped_param_dict): @@ -83,7 +83,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr bf_numbers = np.loadtxt("results.dat",usecols=(1,),dtype="str") new_numbers = copy.deepcopy(eq_numbers) - # replace the number under consideration by all the proposed bf numbers + # Replace the number under consideration by all the proposed bf numbers for kk in range(len(bf_numbers)): eq = eq_ new_numbers[w] = parse_expr(RPN_to_eq(bf_numbers[kk])) diff --git a/aifeynman/S_add_snap_expr_on_pareto.py b/aifeynman/S_add_snap_expr_on_pareto.py index 456427e..6e22340 100644 --- a/aifeynman/S_add_snap_expr_on_pareto.py +++ b/aifeynman/S_add_snap_expr_on_pareto.py @@ -1,4 +1,4 @@ -# Adds on the pareto all the snapped versions of a given expression (all paramters are snapped in the end) +# Adds on the pareto to all the snapped versions of a given expression (all paramters are snapped in the end) import numpy as np import matplotlib.pyplot as plt @@ -32,11 +32,11 @@ def intify(expr): ints = [i for i in floats if int(i) == i] return expr.xreplace(dict(zip(ints, [int(i) for i in ints]))) -# parameters: path to data, math (not RPN) expression +# Parameters: path to data, math (not RPN) expression def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""): input_data = np.loadtxt(pathdir+filename) def unsnap_recur(expr, param_dict, unsnapped_param_dict): - """Recursively transform each numerical value into a learnable parameter.""" + # Recursively transform each numerical value into a learnable parameter. import sympy from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): @@ -56,7 +56,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict): def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): - """Get the next available key that does not collide with the keys in the dictionary.""" + # Get the next available key that does not collide with the keys in the dictionary. if key + suffix not in iterable: return key + suffix else: @@ -85,8 +85,8 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr new_numbers = integerSnap(eq_numbers,w+1) new_numbers = {"pp"+str(k): v for k, v in new_numbers.items()} temp_unsnapped_param_dict.update(new_numbers) - #for kk in range(len(new_numbers)): - # eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] + # for kk in range(len(new_numbers)): + # eq_numbers[new_numbers[kk][0]] = new_numbers[kk][1] new_eq = re.sub(r"(pp\d*)",r"{\1}",str(eq)) new_eq = new_eq.format_map(temp_unsnapped_param_dict) integer_snapped_expr = integer_snapped_expr + [parse_expr(new_eq)] @@ -110,8 +110,8 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr new_numbers = rationalSnap(eq_numbers,w+1) new_numbers = {"pp"+str(k): v for k, v in new_numbers.items()} temp_unsnapped_param_dict.update(new_numbers) - #for kk in range(len(new_numbers)): - # eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3] + # for kk in range(len(new_numbers)): + # eq_numbers_snap[new_numbers[kk][0]] = new_numbers[kk][1][1:3] new_eq = re.sub(r"(pp\d*)",r"{\1}",str(eq)) new_eq = new_eq.format_map(temp_unsnapped_param_dict) rational_snapped_expr = rational_snapped_expr + [parse_expr(new_eq)] @@ -125,7 +125,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr # Calculate the error of the new, snapped expression snapped_error = get_symbolic_expr_error(input_data,str(snapped_expr[i])) # Calculate the complexity of the new, snapped expression - #expr = simplify(powsimp(snapped_expr[i])) + # expr = simplify(powsimp(snapped_expr[i])) expr = snapped_expr[i] for s in (expr.free_symbols): s = symbols(str(s), real = True) @@ -144,7 +144,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr if n_operations!=0 or n_variables!=0: snapped_complexity = snapped_complexity + (n_variables+n_operations)*np.log2((n_variables+n_operations)) - # If a da file is provided, replace the variables with the actual ones before calculating the complexity + # If a .dat file is provided, replace the variables with the actual ones before calculating the complexity else: dr_data = np.loadtxt(DR_file,dtype="str",delimiter=",") @@ -157,7 +157,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr expr = parse_expr(expr) for s in (expr.free_symbols): s = symbols(str(s), real = True) - #expr = simplify(parse_expr(str(expr),locals())) + # expr = simplify(parse_expr(str(expr),locals())) expr = parse_expr(str(expr),locals()) snapped_complexity = 0 for j in numbers_expr: diff --git a/aifeynman/S_add_sym_on_pareto.py b/aifeynman/S_add_sym_on_pareto.py index acf30bf..da70dbf 100644 --- a/aifeynman/S_add_sym_on_pareto.py +++ b/aifeynman/S_add_sym_on_pareto.py @@ -1,4 +1,4 @@ -# Combines 2 pareto fromtier obtained from the separability test into a new one. +# Combines 2 pareto frontier expressions obtained from the separability test into a new one. from .get_pareto import Point, ParetoSet from sympy.parsing.sympy_parser import parse_expr diff --git a/aifeynman/S_brute_force.py b/aifeynman/S_brute_force.py index 8903fb6..134da4c 100644 --- a/aifeynman/S_brute_force.py +++ b/aifeynman/S_brute_force.py @@ -1,6 +1,6 @@ -# runs BF on data and saves the best RPN expressions in results.dat -# all the .dat files are created after I run this script -# the .scr are needed to run the fortran code +# Runs BF on data and saves the best RPN expressions in results.dat +# All the .dat files are created after this script is run +# The .scr are needed to run the fortran code import csv import os diff --git a/aifeynman/S_brute_force_comp.py b/aifeynman/S_brute_force_comp.py index 211b190..e4ce7ad 100644 --- a/aifeynman/S_brute_force_comp.py +++ b/aifeynman/S_brute_force_comp.py @@ -1,7 +1,9 @@ -# runs BF on data and saves the best RPN expressions in results.dat. This is used to find function proportional to the desired one using gradients -# all the .dat files are created after I run this script -# the .scr are needed to run the fortran code - +""" +Runs BF on data and saves the best RPN expressions in results.dat +All the .dat files are created after this script is run +The .scr are needed to run the fortran code +This is used to find function proportional to the desired one using gradients +""" import csv import os import shutil diff --git a/aifeynman/S_brute_force_gen_sym.py b/aifeynman/S_brute_force_gen_sym.py index 0506d17..df7c5a0 100644 --- a/aifeynman/S_brute_force_gen_sym.py +++ b/aifeynman/S_brute_force_gen_sym.py @@ -1,6 +1,8 @@ -# runs BF on data and saves the best RPN expressions in results.dat -# all the .dat files are created after I run this script -# the .scr are needed to run the fortran code +""" +Runs BF on data and saves the best RPN expressions in results.dat +All the .dat files are created after this script is run +The .scr are needed to run the fortran code +""" import csv import os diff --git a/aifeynman/S_brute_force_number.py b/aifeynman/S_brute_force_number.py index d4e1a3e..87b5fde 100644 --- a/aifeynman/S_brute_force_number.py +++ b/aifeynman/S_brute_force_number.py @@ -1,6 +1,6 @@ -# runs BF on data and saves the best RPN expressions in results.dat -# all the .dat files are created after I run this script -# the .scr are needed to run the fortran code +# Runs BF on data and saves the best RPN expressions in results.dat +# All the .dat files are created after this script is run +# The .scr are needed to run the fortran code import csv import os diff --git a/aifeynman/S_combine_pareto.py b/aifeynman/S_combine_pareto.py index fa048de..4bca51f 100644 --- a/aifeynman/S_combine_pareto.py +++ b/aifeynman/S_combine_pareto.py @@ -1,4 +1,4 @@ -# Combines 2 pareto fromtier obtained from the separability test into a new one. +# Combines 2 pareto frontier expressions obtained from the separability test into a new one. from .get_pareto import Point, ParetoSet from .S_get_symbolic_expr_error import get_symbolic_expr_error @@ -18,7 +18,7 @@ def combine_pareto(input_data,PA1,PA2,idx_list_1,idx_list_2,PA,sep_type = "+"): for i in range(len(PA1)): for j in range(len(PA2)): try: - # replace the variables from the separated parts with the variables reflecting the new combined equation + # Replace the variables from the separated parts with the variables reflecting the new combined equation exp1 = PA1[i][2] exp2 = PA2[j][2] for k in range(len(idx_list_1)-1,-1,-1): diff --git a/aifeynman/S_final_gd.py b/aifeynman/S_final_gd.py index 8a85447..baf27a9 100644 --- a/aifeynman/S_final_gd.py +++ b/aifeynman/S_final_gd.py @@ -21,15 +21,13 @@ from .S_get_number_DL_snapped import get_number_DL_snapped from .S_get_symbolic_expr_error import get_symbolic_expr_error -# parameters: path to data, RPN expression (obtained from bf) +# Parameters: path to data, RPN expression (obtained from bf) def final_gd(data, math_expr, lr = 1e-2, N_epochs = 5000): param_dict = {} unsnapped_param_dict = {'p':1} def unsnap_recur(expr, param_dict, unsnapped_param_dict): - """Recursively transform each numerical value into a learnable parameter.""" - import sympy - from sympy import Symbol + # Recursively transform each numerical value into a learnable parameter. if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) @@ -93,7 +91,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr for i in range(N_epochs): - # this order is fixed i.e. first parameters + # This order is fixed i.e. first parameters yy = f(*input) loss = torch.mean((yy-y)**2) loss.backward() @@ -105,7 +103,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr break for i in range(N_epochs): - # this order is fixed i.e. first parameters + # This order is fixed i.e. first parameters yy = f(*input) loss = torch.mean((yy-y)**2) loss.backward() @@ -120,7 +118,7 @@ def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=Tr if torch.isnan(trainable_parameters[nan_i])==True or abs(trainable_parameters[nan_i])>1e7: return 1000000, 10000000, "1" - # get the updated symbolic regression + # Get the updated symbolic regression ii = -1 for parm in unsnapped_param_dict: if ii == -1: diff --git a/aifeynman/S_gen_sym.py b/aifeynman/S_gen_sym.py index 08f8d45..e7c6965 100644 --- a/aifeynman/S_gen_sym.py +++ b/aifeynman/S_gen_sym.py @@ -15,7 +15,7 @@ is_cuda = torch.cuda.is_available() -# fix this to work with the other variables constant +# TODO: Fix this to work with the other variables constant def check_gen_sym(pathdir,filename,model,gen_sym_idx,express,mu,sigma,nu=10): gen_sym_idx = np.append(gen_sym_idx,-1) data_all = np.loadtxt(pathdir+filename) @@ -110,7 +110,7 @@ def do_gen_sym(pathdir, filename, gen_sym_idx,express): new_data = f(*np.transpose(data[:,0:-1])) data_all[:,gen_sym_idx[0]]=new_data - #save_data = np.column_stack((new_data,data_all)) + # save_data = np.column_stack((new_data,data_all)) save_data = data_all try: @@ -140,7 +140,7 @@ def add_gen_sym_on_pareto(PA1,PA, gen_sym_idx, express): exp1 = exp1.replace(possible_vars[j],possible_vars[j+1]) temp_list = np.delete(temp_list,-1) - # replace variables in bf_eq + # Replace variables in bf_eq arr_idx = np.flip(np.arange(0,len(gen_sym_idx),1), axis=0) actual_idx = np.flip(gen_sym_idx, axis=0) for k in range(len(gen_sym_idx)): diff --git a/aifeynman/S_get_number_DL.py b/aifeynman/S_get_number_DL.py index a69f813..8f7391a 100644 --- a/aifeynman/S_get_number_DL.py +++ b/aifeynman/S_get_number_DL.py @@ -4,14 +4,14 @@ def get_number_DL(n): epsilon = 1e-10 - # check if integer + # Check if integer if np.isnan(n): return 1000000 elif np.abs(n - int(n)) < epsilon: return np.log2(1+abs(n)) elif np.abs(n - np.pi) < epsilon: return np.log2(1+3) - # check if real + # Check if real else: PrecisionFloorLoss = 1e-14 return np.log2(1 + (float(n) / PrecisionFloorLoss) ** 2) / 2 diff --git a/aifeynman/S_get_symbolic_expr_error.py b/aifeynman/S_get_symbolic_expr_error.py index ab41b3c..3b54a39 100644 --- a/aifeynman/S_get_symbolic_expr_error.py +++ b/aifeynman/S_get_symbolic_expr_error.py @@ -27,7 +27,7 @@ def get_symbolic_expr_error(data,expr): # Remove accidental nan's good_idx = np.where(np.isnan(f(*real_variables))==False) - # use this to get rid of cases where the loss gets complex because of transformations of the output variable + # Use this to get rid of cases where the loss gets complex because of transformations of the output variable if isinstance(np.mean((f(*real_variables)-data[:,-1])**2), complex): return 1000000 else: diff --git a/aifeynman/S_gradient_decomposition.py b/aifeynman/S_gradient_decomposition.py index 92c8842..7bd9ee1 100644 --- a/aifeynman/S_gradient_decomposition.py +++ b/aifeynman/S_gradient_decomposition.py @@ -111,7 +111,7 @@ def score_consistency(grads_tensor): evals, evecs = np.linalg.eig(D) nv = evals.shape[0] assert(nv == A.shape[1]) - # evecs should be d x (num eigenvectors - probably also d) + # evecs should be D x (num eigenvectors - probably also D) dots = np.einsum('ij,jk->ik', A, evecs) scores = np.einsum('ij,ij->j', dots, dots) overall_score = np.max(scores)/n_pts @@ -130,8 +130,8 @@ def visualize_distribution(hypot, bench): X_plot = np.linspace(-8, 0, 1000)[:, np.newaxis] log_dens = kde.score_samples(X_plot) log_dens2= kde2.score_samples(X_plot) - plt.plot(X_plot[:, 0], np.exp(log_dens), color='#FFAA00', label='Hypothesis') #hypot is orange - plt.plot(X_plot[:, 0], np.exp(log_dens2), color='#00AAFF', label='Benchmark')#blue is bench + plt.plot(X_plot[:, 0], np.exp(log_dens), color='#FFAA00', label='Hypothesis') # Hypot is orange + plt.plot(X_plot[:, 0], np.exp(log_dens2), color='#00AAFF', label='Benchmark') # Bench is blue plt.legend() plt.savefig(f'plot_{plot_counter}.pdf') plot_counter += 1 @@ -204,7 +204,7 @@ def filter_decompositions_relative_scoring(X, y, model, max_subset_size=None, vi score, _ = score_consistency(evaluate_derivatives_andrew(model, s, samples)) bench_scores.append(score) snr = signal_to_noise(hypot_scores, bench_scores) - # penalizes larger decompositions + # Penalizes larger decompositions snr -= np.log10(2)*len(s) results.append((snr, s)) print((snr, s)) @@ -238,7 +238,7 @@ def extract_gradients(X, y, model, s, num_points): for i in range(num_points): samples = draw_samples(X, y, model, s, 50, point=idx[i]) _, gradients[i, :] = score_consistency(evaluate_derivatives_andrew(model, s, samples)) - # normalize first dimension + # Normalize first dimension if(gradients[i, 0] < 0): gradients[i,:] *= -1 return np.concatenate((X[idx, :][:, s].cpu().data.numpy(), gradients),axis=1) diff --git a/aifeynman/S_polyfit_utils.py b/aifeynman/S_polyfit_utils.py index 1f009d0..1059e38 100644 --- a/aifeynman/S_polyfit_utils.py +++ b/aifeynman/S_polyfit_utils.py @@ -36,8 +36,8 @@ def multipolyfit(xs, y, deg): # Raise data to specified degree pattern, stack in order A = hstack(asarray([as_tall((xs**p).prod(1)) for p in powers])) - params = lsqr(A, y)[0] # get the best params of the fit - rms = lsqr(A, y)[4] # get the rms params of the fit + params = lsqr(A, y)[0] # Get the best params of the fit + rms = lsqr(A, y)[4] # Get the rms params of the fit return (params, rms) @@ -47,7 +47,7 @@ def getBest(xs,y,max_deg): for i in range(0,max_deg+1): results = results + [multipolyfit(xs,y,i)] results = np.array(results) - # get the parameters and error of the fit with the lowest rms error + # Get the parameters and error of the fit with the lowest rms error params = results[np.argmin(results[:,1:])][0] error = results[np.argmin(results[:,1:])][1] deg = np.argmin(results[:,1:]) diff --git a/aifeynman/S_remove_input_neuron.py b/aifeynman/S_remove_input_neuron.py index 7ac383a..b9afd6d 100644 --- a/aifeynman/S_remove_input_neuron.py +++ b/aifeynman/S_remove_input_neuron.py @@ -1,4 +1,4 @@ -# Remove on input neuron from a NN +# Remove an input neuron from a NN from __future__ import print_function import torch diff --git a/aifeynman/S_run_aifeynman.py b/aifeynman/S_run_aifeynman.py index 0069493..7391cbd 100644 --- a/aifeynman/S_run_aifeynman.py +++ b/aifeynman/S_run_aifeynman.py @@ -36,7 +36,7 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit except: pass - # load the data for different checks + # Load the data for different checks data = np.loadtxt(pathdir+filename) # Run bf and polyfit @@ -58,7 +58,7 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit PA = get_tan(pathdir,"results/mystery_world_tan/",filename,BF_try_time,BF_ops_file_type, PA, polyfit_deg) ############################################################################################################################# - # check if the NN is trained. If it is not, train it on the data. + # Check if the NN is trained. If it is not, train it on the data. if len(data[0])<3: print("Just one variable!") pass @@ -119,7 +119,7 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit idx_comp = 0 if succ_grad == 1: - #try: + # try: for qqqq in range(1): brute_force_comp("results/","gradients_comp_%s.txt" %filename,600,"14ops.txt") bf_all_output = np.loadtxt("results_comp.dat", dtype="str") @@ -135,8 +135,8 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit sigma = new_sigma except: continue - #except: - # idx_comp = 0 + # except: + # idx_comp = 0 else: idx_comp = 0 print("") @@ -150,7 +150,7 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit idx_gen_sym = 0 for kiiii in range(1): if len(data[0])>3: - # find the best separability indices + # Find the best separability indices decomp_idx = identify_decompositions(pathdir,filename, model_feynman) brute_force_gen_sym("results/","gradients_gen_sym_%s" %filename,600,"14ops.txt") bf_all_output = np.loadtxt("results_gen_sym.dat", dtype="str") @@ -244,7 +244,7 @@ def run_AI_all(pathdir,filename,BF_try_time=60,BF_ops_file_type="14ops", polyfit return PA else: return PA -# this runs snap on the output of aifeynman +# This runs snap on the output of aifeynman def run_aifeynman(pathdir,filename,BF_try_time,BF_ops_file_type, polyfit_deg=4, NN_epochs=4000, vars_name=[],test_percentage=20): # If the variable names are passed, do the dimensional analysis first filename_orig = filename @@ -270,7 +270,7 @@ def run_aifeynman(pathdir,filename,BF_try_time,BF_ops_file_type, polyfit_deg=4, np.savetxt(pathdir+filename+"_test",test_data) PA = ParetoSet() - # Run the code on the train data + # Run the code on the trained data PA = run_AI_all(pathdir,filename+"_train",BF_try_time,BF_ops_file_type, polyfit_deg, NN_epochs, PA=PA) PA_list = PA.get_pareto_points() diff --git a/aifeynman/S_run_bf_polyfit.py b/aifeynman/S_run_bf_polyfit.py index 820b64f..2f0874b 100644 --- a/aifeynman/S_run_bf_polyfit.py +++ b/aifeynman/S_run_bf_polyfit.py @@ -1,4 +1,4 @@ -# add a function to compte complexity +# Add a function to compute complexity from .get_pareto import Point, ParetoSet from .RPN_to_pytorch import RPN_to_pytorch @@ -21,12 +21,12 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ input_data = np.loadtxt(pathdir_transformed+filename) ############################################################################################################################# if np.isnan(input_data).any()==False: - # run BF on the data (+) + # Run BF on the data (+) print("Checking for brute force + \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"+") try: - # load the BF output data + # Load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] @@ -88,7 +88,7 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) - # run gradient descent of BF output parameters and add the results to the Pareto plot + # Run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(input_data,eqns[i]) @@ -99,12 +99,12 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ pass ############################################################################################################################# - # run BF on the data (*) + # Run BF on the data (*) print("Checking for brute force * \n") brute_force(pathdir_transformed,filename,BF_try_time,BF_ops_file_type,"*") try: - # load the BF output data + # Load the BF output data bf_all_output = np.loadtxt("results.dat", dtype="str") express = bf_all_output[:,2] prefactors = bf_all_output[:,1] @@ -163,11 +163,11 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ except: continue - # add the BF output to the Pareto plot + # Add the BF output to the Pareto plot for i in range(len(complexity)): PA.add(Point(x=complexity[i], y=errors[i], data=eqns[i])) - # run gradient descent of BF output parameters and add the results to the Pareto plot + # Run gradient descent of BF output parameters and add the results to the Pareto plot for i in range(len(express)): try: bf_gd_update = RPN_to_pytorch(input_data,eqns[i]) @@ -178,7 +178,7 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ pass ############################################################################################################################# - # run polyfit on the data + # Run polyfit on the data print("Checking polyfit \n") try: polyfit_result = polyfit(polyfit_deg, pathdir_transformed+filename) @@ -226,7 +226,7 @@ def run_bf_polyfit(pathdir,pathdir_transformed,filename,BF_try_time,BF_ops_file_ except: pass - #run zero snap on polyfit output + # Run zero snap on polyfit output PA_poly = ParetoSet() PA_poly.add(Point(x=complexity, y=polyfit_err, data=str(eqn))) PA_poly = add_snap_expr_on_pareto(pathdir, filename, str(eqn), PA_poly) diff --git a/aifeynman/S_separability.py b/aifeynman/S_separability.py index 47a5016..115fd9b 100644 --- a/aifeynman/S_separability.py +++ b/aifeynman/S_separability.py @@ -42,13 +42,13 @@ def check_separability_plus(pathdir, filename): try: pathdir_weights = "results/NN_trained_models/models/" - # load the data + # Load the data n_variables = np.loadtxt(pathdir+filename, dtype='str').shape[1]-1 variables = np.loadtxt(pathdir+filename, usecols=(0,)) if n_variables==1: print(filename, "just one variable for ADD") - # if there is just one variable you have nothing to separate + # If there is just one variable you have nothing to separate return (-1,-1,-1) else: for j in range(1,n_variables): @@ -73,7 +73,7 @@ def check_separability_plus(pathdir, filename): product = product product = product.float() - # load the trained model and put it in evaluation mode + # Load the trained model and put it in evaluation mode if is_cuda: model = SimpleNet(n_variables).cuda() else: @@ -81,7 +81,7 @@ def check_separability_plus(pathdir, filename): model.load_state_dict(torch.load(pathdir_weights+filename+".h5")) model.eval() - # make some variables at the time equal to the median of factors + # Make some variables at the time equal to the median of factors models_one = [] models_rest = [] @@ -90,7 +90,7 @@ def check_separability_plus(pathdir, filename): for k in range(len(factors[0])): fact_vary[:,k] = torch.full((len(factors),),torch.median(factors[:,k])) - # loop through all indices combinations + # Loop through all indices combinations var_indices_list = np.arange(0,n_variables,1) min_error = 1000 best_i = [] @@ -107,14 +107,14 @@ def check_separability_plus(pathdir, filename): fact_vary_one[:,t1] = torch.full((len(factors),),torch.median(factors[:,t1])) for t2 in j: fact_vary_rest[:,t2] = torch.full((len(factors),),torch.median(factors[:,t2])) - # check if the equation is separable + # Check if the equation is separable sm = model(fact_vary_one)+model(fact_vary_rest) - #error = torch.sqrt(torch.mean((product-sm+model(fact_vary))**2))/torch.sqrt(torch.mean(product**2)) + # error = torch.sqrt(torch.mean((product-sm+model(fact_vary))**2))/torch.sqrt(torch.mean(product**2)) list_errs = 2*abs(product-sm+model(fact_vary)) error = torch.median(list_errs) mu = torch.mean(torch.log2(1+list_errs*2**30)) sigma = torch.std(torch.log2(1+list_errs*2**30)) - #error = 2*torch.median(abs(product-sm+model(fact_vary))) + # error = 2*torch.median(abs(product-sm+model(fact_vary))) if errorx+a for 2 variables at a time (different variables) + # Make the shift x->x+a for 2 variables at a time (different variables) min_error = 1000 best_i = -1 best_j = -1 @@ -121,7 +121,7 @@ def do_translational_symmetry_minus(pathdir, filename, i,j): try: pathdir_weights = "results/NN_trained_models/models/" - # load the data + # Load the data n_variables = np.loadtxt(pathdir+"/%s" %filename, dtype='str').shape[1]-1 variables = np.loadtxt(pathdir+"/%s" %filename, usecols=(0,)) @@ -146,7 +146,7 @@ def do_translational_symmetry_minus(pathdir, filename, i,j): product = product product = product.float() - # load the trained model and put it in evaluation mode + # Load the trained model and put it in evaluation mode if is_cuda: model = SimpleNet(n_variables).cuda() else: @@ -177,18 +177,18 @@ def do_translational_symmetry_minus(pathdir, filename, i,j): return (-1,-1) -# checks if f(x,y)=f(x/y) +# Checks if f(x,y)=f(x/y) def check_translational_symmetry_divide(pathdir, filename): try: pathdir_weights = "results/NN_trained_models/models/" - # load the data + # Load the data n_variables = np.loadtxt(pathdir+"/%s" %filename, dtype='str').shape[1]-1 variables = np.loadtxt(pathdir+"/%s" %filename, usecols=(0,)) if n_variables==1: print(filename, "just one variable for ADD \n") - # if there is just one variable you have nothing to separate + # If there is just one variable you have nothing to separate return (-1,-1,-1) else: for j in range(1,n_variables): @@ -213,7 +213,7 @@ def check_translational_symmetry_divide(pathdir, filename): product = product product = product.float() - # load the trained model and put it in evaluation mode + # Load the trained model and put it in evaluation mode if is_cuda: model = SimpleNet(n_variables).cuda() else: @@ -231,7 +231,7 @@ def check_translational_symmetry_divide(pathdir, filename): best_j = -1 best_mu = 0 best_sigma = 0 - # make the shift x->x*a and y->y*a for 2 variables at a time (different variables) + # Make the shift x->x*a and y->y*a for 2 variables at a time (different variables) for i in range(0,n_variables,1): for j in range(0,n_variables,1): if ix*a and y->y/a for 2 variables at a time (different variables) + # Make the shift x->x*a and y->y/a for 2 variables at a time (different variables) for i in range(0,n_variables,1): for j in range(0,n_variables,1): if i.*\.txt<') filelist = map(lambda x: x[1:-1], pattern.findall(string)) diff --git a/aifeynman/get_pareto.py b/aifeynman/get_pareto.py index 145a2de..5559df7 100644 --- a/aifeynman/get_pareto.py +++ b/aifeynman/get_pareto.py @@ -14,7 +14,7 @@ def __init__(self, x, y, data=None, id=None): def __getitem__(self, index): - """Indexing: get item according to index.""" + # Indexing: get item according to index. if index == 0: return self.x elif index == 1: @@ -28,7 +28,7 @@ def __getitem__(self, index): def __setitem__(self, index, value): - """Indexing: set item according to index.""" + # Indexing: set item according to index. if index == 0: self.x = value elif index == 1: @@ -45,14 +45,15 @@ def __setitem__(self, index, value): class ParetoSet(SortedKeyList): - """Maintained maximal set with efficient insertion. Note that we use the convention of smaller the better.""" + # Maintained maximal set with efficient insertion. Note that we use the convention of smaller the better. def __init__(self): super().__init__(key=lambda p: p.x) def _input_check(self, p): - """Check that input is in the correct format. + """ + Check that input is in the correct format. Args: p: input @@ -80,7 +81,8 @@ def get_id_list(self): def add(self, p): - """Insert Point into set if minimal in first two indices. + """ + Insert Point into set if minimal in first two indices. Args: p (Point): Point to insert @@ -92,20 +94,20 @@ def add(self, p): p = self._input_check(p) is_pareto = False - # check right for dominated points: + # Check right for dominated points: right = self.bisect_left(p) while len(self) > right and self[right].y >= p.y and not (self[right].x == p.x and self[right].y == p.y): self.pop(right) is_pareto = True - # check left for dominating points: + # Check left for dominating points: left = self.bisect_right(p) - 1 if left == -1 or self[left][1] > p[1]: is_pareto = True - # if it's the only point it's maximal + # If it's the only point it's maximal if len(self) == 0: is_pareto = True @@ -130,7 +132,8 @@ def __contains__(self, p): def __add__(self, other): - """Merge another pareto set into self. + """ + Merge another pareto set into self. Args: other (ParetoSet): set to merge into self @@ -147,7 +150,8 @@ def __add__(self, other): def distance(self, p): - """Given a Point, calculate the minimum Euclidean distance to pareto + """ + Given a Point, calculate the minimum Euclidean distance to pareto frontier (in first two indices). Args: @@ -162,16 +166,16 @@ def distance(self, p): point = np.array((p.x, p.y)) dom = self.dominant_array(p) - # distance is zero if pareto optimal + # Distance is zero if pareto optimal if dom.shape[0] == 0: return 0. - # add corners of all adjacent pairs + # Add corners of all adjacent pairs candidates = np.zeros((dom.shape[0] + 1, 2)) for i in range(dom.shape[0] - 1): candidates[i, :] = np.max(dom[[i, i+1], :], axis=0) - # add top and right bounds + # Add top and right bounds candidates[-1, :] = (p.x, np.min(dom[:, 1])) candidates[-2, :] = (np.min(dom[:, 0]), p.y) @@ -179,7 +183,8 @@ def distance(self, p): def dominant_array(self, p): - """Given a Point, return the set of dominating points in the set (in + """ + Given a Point, return the set of dominating points in the set (in the first two indices). Args: @@ -203,7 +208,8 @@ def dominant_array(self, p): def to_array(self): - """Convert first two indices to numpy.ndarray + """ + Convert first two indices to numpy.ndarray Args: None @@ -219,8 +225,8 @@ def to_array(self): return A def get_pareto_points(self): - """Returns the x, y and data for each point in the pareto frontier - + """ + Returns the x, y and data for each point in the pareto frontier """ pareto_points = [] for i, p in enumerate(self): @@ -230,7 +236,8 @@ def get_pareto_points(self): def from_list(self, A): - """Convert iterable of Points into ParetoSet. + """ + Convert iterable of Points into ParetoSet. Args: A (iterator): iterator of Points @@ -244,7 +251,9 @@ def from_list(self, A): def plot(self): - """Plotting the Pareto frontier.""" + """ + Plotting the Pareto frontier. + """ array = self.to_array() plt.figure(figsize=(8, 6)) plt.plot(array[:, 0], array[:, 1], 'r.') diff --git a/aifeynman/test_points.py b/aifeynman/test_points.py index 76853c0..68cbc5f 100644 --- a/aifeynman/test_points.py +++ b/aifeynman/test_points.py @@ -154,8 +154,8 @@ def analyze_reference_point(self, pt, opt, disp): max_distance = np.linalg.norm(self.high_range - self.low_range) reference_point_rel_distance = reference_point_distance / max_distance self.log.append((reference_point_rel_error, reference_point_rel_distance)) -# if disp: -# print(f'{pt} : found {opt}, err: {reference_point_rel_error}, distance: {reference_point_rel_distance}') + # if disp: + # print(f'{pt} : found {opt}, err: {reference_point_rel_error}, distance: {reference_point_rel_distance}') def score_pt(self, model, full_pt, disp=False, log=False): pt = full_pt[self.bitmask] From 804c1a6d3afd042ec6377bacfa7738acbcd9331f Mon Sep 17 00:00:00 2001 From: Patrick Ryan Date: Tue, 27 Oct 2020 18:53:37 -0400 Subject: [PATCH 2/2] remove remaining extra sympy imports, multiline comment -> single line --- aifeynman/RPN_to_pytorch.py | 2 -- aifeynman/S_add_bf_on_numbers_on_pareto.py | 6 ++---- aifeynman/S_add_snap_expr_on_pareto.py | 2 -- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/aifeynman/RPN_to_pytorch.py b/aifeynman/RPN_to_pytorch.py index fa5afa8..c007266 100644 --- a/aifeynman/RPN_to_pytorch.py +++ b/aifeynman/RPN_to_pytorch.py @@ -28,8 +28,6 @@ def RPN_to_pytorch(data, math_expr, lr = 1e-2, N_epochs = 500): def unsnap_recur(expr, param_dict, unsnapped_param_dict): # Recursively transform each numerical value into a learnable parameter. - import sympy - from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) diff --git a/aifeynman/S_add_bf_on_numbers_on_pareto.py b/aifeynman/S_add_bf_on_numbers_on_pareto.py index ea3be5c..a7dbe85 100644 --- a/aifeynman/S_add_bf_on_numbers_on_pareto.py +++ b/aifeynman/S_add_bf_on_numbers_on_pareto.py @@ -32,9 +32,7 @@ def add_bf_on_numbers_on_pareto(pathdir, filename, PA, math_expr): input_data = np.loadtxt(pathdir+filename) def unsnap_recur(expr, param_dict, unsnapped_param_dict): - """Recursively transform each numerical value into a learnable parameter.""" - import sympy - from sympy import Symbol + # Recursively transform each numerical value into a learnable parameter. if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "p", is_underscore=False) @@ -52,7 +50,7 @@ def unsnap_recur(expr, param_dict, unsnapped_param_dict): def get_next_available_key(iterable, key, midfix="", suffix="", is_underscore=True): - """Get the next available key that does not collide with the keys in the dictionary.""" + # Get the next available key that does not collide with the keys in the dictionary. if key + suffix not in iterable: return key + suffix else: diff --git a/aifeynman/S_add_snap_expr_on_pareto.py b/aifeynman/S_add_snap_expr_on_pareto.py index 6e22340..90cf6ae 100644 --- a/aifeynman/S_add_snap_expr_on_pareto.py +++ b/aifeynman/S_add_snap_expr_on_pareto.py @@ -37,8 +37,6 @@ def add_snap_expr_on_pareto(pathdir, filename, math_expr, PA, DR_file=""): input_data = np.loadtxt(pathdir+filename) def unsnap_recur(expr, param_dict, unsnapped_param_dict): # Recursively transform each numerical value into a learnable parameter. - import sympy - from sympy import Symbol if isinstance(expr, sympy.numbers.Float) or isinstance(expr, sympy.numbers.Integer) or isinstance(expr, sympy.numbers.Rational) or isinstance(expr, sympy.numbers.Pi): used_param_names = list(param_dict.keys()) + list(unsnapped_param_dict) unsnapped_param_name = get_next_available_key(used_param_names, "pp", is_underscore=False)