-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathplot_eval_acc.py
215 lines (172 loc) · 8.18 KB
/
plot_eval_acc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
import argparse
import os
import pandas as pd
import matplotlib.pyplot as plt
# Import the parse_compiled_eval_files function
from theory.plot_exactness import parse_compiled_eval_files
def parse_filename(filename):
filename_lower = filename.lower().replace('.txt', '')
dataset_types = ['marginal', 'random', 'skewed']
parts = filename_lower.split('_')
# Find the indices of the dataset types in the parts
dataset_indices = []
for idx, part in enumerate(parts):
for dataset_type in dataset_types:
if dataset_type == part:
dataset_indices.append((idx, dataset_type))
break # Stop checking other dataset types
if len(dataset_indices) < 2:
return None
# Model name is everything before the first dataset type
first_idx, first_dataset_type = dataset_indices[0]
model_name = '_'.join(parts[:first_idx + 1])
train_dataset_type = first_dataset_type.capitalize()
# Test dataset type is the second occurrence
second_idx, second_dataset_type = dataset_indices[1]
test_dataset_type = second_dataset_type.capitalize()
# Number of variables in the test set is assumed to be the numeric part before the second-to-last "_"
# Or between the dataset types
num_vars = None
# Try to find the number between first_idx and second_idx
for idx in range(first_idx, second_idx):
if parts[idx].isdigit():
num_vars = int(parts[idx])
break
if num_vars is None:
# Try to get the number before the second-to-last '_'
if len(parts) >= 3 and parts[-3].isdigit():
num_vars = int(parts[-3])
else:
print(f"Skipping {filename}, couldn't find number of variables.")
return None
return model_name, train_dataset_type, test_dataset_type, num_vars
def main():
parser = argparse.ArgumentParser(description="Process dataset files and compute accuracies.")
parser.add_argument('directory', type=str, help="Directory containing dataset files")
parser.add_argument('-l', '--train_var_min', type=int, default=6, help="Minimum number of variables in the training regime")
parser.add_argument('-r', '--train_var_max', type=int, default=10, help="Maximum number of variables in the training regime")
parser.add_argument('-b', '--beta', type=float, default=20, help="Mean Exactness beta value for compiled models")
parser.add_argument('-c', '--compiled_dir', type=str, default=None, help="Directory containing compiled model evaluation results")
parser.add_argument('-t', '--eval_type', type=str, choices=['Marginal', 'Random', 'Skewed'], default='Marginal', help="Evaluation dataset type to plot")
args = parser.parse_args()
directory = args.directory
train_var_min = args.train_var_min
train_var_max = args.train_var_max
data = []
# Iterate over files in the directory
for filename in os.listdir(directory):
if not filename.endswith('.txt'):
continue
parsed = parse_filename(filename)
if parsed is None:
continue
model_name, train_dataset_type, test_dataset_type, num_vars = parsed
# Now, read the file and compute accuracy
file_path = os.path.join(directory, filename)
with open(file_path, 'r') as f:
lines = f.readlines()
total_samples = len(lines)
correct_predictions = 0
for idx, line in enumerate(lines):
tokens = line.strip().split()
if not tokens:
continue
last_token = tokens[-1]
# Ground truth label: odd lines -> SAT, even lines -> UNSAT
if (idx + 1) % 2 == 1:
ground_truth = 'SAT'
else:
ground_truth = 'UNSAT'
# Prediction
if last_token in ['SAT', 'UNSAT']:
prediction = last_token
else:
prediction = 'Wrong'
if prediction == ground_truth:
correct_predictions += 1
accuracy = correct_predictions / total_samples if total_samples > 0 else 0.0
# Store the data
data.append({
'Model': model_name,
'Train Dataset': train_dataset_type,
'Test Dataset': test_dataset_type,
'Num Vars': num_vars,
'Accuracy': accuracy
})
# Create DataFrame
df = pd.DataFrame(data)
# Filter DataFrame to include variable numbers within training regime
df_training_regime = df[(df['Num Vars'] >= train_var_min) & (df['Num Vars'] <= train_var_max)]
# Compute the 9 accuracy results
train_datasets = ['Marginal', 'Skewed', 'Random']
test_datasets = ['Marginal', 'Skewed', 'Random']
accuracy_results = []
for train_dataset in train_datasets:
for test_dataset in test_datasets:
df_subset = df_training_regime[(df_training_regime['Train Dataset'] == train_dataset) &
(df_training_regime['Test Dataset'] == test_dataset)]
avg_accuracy = df_subset['Accuracy'].mean()
accuracy_results.append({
'Train Dataset': train_dataset,
'Test Dataset': test_dataset,
'Average Accuracy': avg_accuracy
})
# Output the accuracy results
print("Average Accuracies within Training Regime (Variables {} to {}):".format(train_var_min, train_var_max))
for result in accuracy_results:
print("Train Dataset: {}, Test Dataset: {}, Average Accuracy: {:.2%}".format(
result['Train Dataset'], result['Test Dataset'], result['Average Accuracy']))
# Filter DataFrame to include only Test Dataset == args.eval_type
df_eval = df[df['Test Dataset'] == args.eval_type]
# Plotting
plt.figure(figsize=(10, 6))
# Assign colors based on training dataset
color_map = {
'Random': 'blue',
'Marginal': 'orange',
'Skewed': 'green',
'Compiled': 'purple'
}
# For each model, plot accuracy over variable numbers
models = df_eval['Model'].unique()
for model in models:
split_name = model.split('_')
model_l, model_r = split_name[0], split_name[1]
if int(model_l) != train_var_min:
print(f"Skipping model {model} that does not match training regime.")
continue
df_model = df_eval[df_eval['Model'] == model]
df_model = df_model.sort_values('Num Vars')
train_dataset = df_model['Train Dataset'].iloc[0] # Assuming all entries have the same train dataset
color = color_map.get(train_dataset, 'black') # Default to black if not found
plt.plot(df_model['Num Vars'], df_model['Accuracy'] * 100, marker='o', label=model, color=color)
# If compiled_dir is provided, load compiled model data
if args.compiled_dir:
df_compiled = parse_compiled_eval_files(args.compiled_dir)
if df_compiled is not None:
# Filter data for the specified beta and eval_type
df_compiled_beta = df_compiled[(df_compiled['beta'] == args.beta) & (df_compiled['Test Dataset'] == args.eval_type)]
if not df_compiled_beta.empty:
# Sort by num_vars
df_compiled_beta = df_compiled_beta.sort_values('num_vars')
# Plot the data
plt.plot(df_compiled_beta['num_vars'], df_compiled_beta['accuracy'], marker='o', label='Compiled', color=color_map['Compiled'])
else:
print(f"No compiled data found for beta={args.beta} and Test Dataset={args.eval_type}")
# Add shaded region between x=train_var_min and x=train_var_max
plt.axvspan(train_var_min, train_var_max, color='gray', alpha=0.2)
# Get current axes and y-limits
ax = plt.gca()
ymin, ymax = ax.get_ylim()
ypos = (ymin + ymax) / 2 # Middle of y-axis
# Add label for the shaded region
plt.text((train_var_min + train_var_max) / 2, ypos, 'Training Regime',
ha='center', va='center', color='gray', fontsize=12)
plt.xlabel('Number of Variables')
plt.ylabel('Accuracy')
plt.title(f'Accuracy on {args.eval_type} Dataset')
plt.legend()
plt.grid(True, linestyle='--') # Make grid lines dashed
plt.show()
if __name__ == '__main__':
main()