Skip to content

Commit

Permalink
saving and loading added to datasets.py, frobenius norm updated, adde…
Browse files Browse the repository at this point in the history
…d some files from experiments

Signed-off-by: 1andrin <[email protected]>

scoring (updated version frobenius norm)

Signed-off-by: 1andrin <[email protected]>

updates

Signed-off-by: 1andrin <[email protected]>

several updates to frobenius norm, policy risk, prob erupt and the new score bite

Signed-off-by: 1andrin <[email protected]>

cleaned notebooks folder

Signed-off-by: 1andrin <[email protected]>

linter issues

Signed-off-by: 1andrin <[email protected]>

delete old cluster version of scoring.py

Signed-off-by: 1andrin <[email protected]>

linter issues

Signed-off-by: 1andrin <[email protected]>

linter issues

Signed-off-by: 1andrin <[email protected]>

updates to prob erupt and fixes in bite (according to tests)

Signed-off-by: 1andrin <[email protected]>

fixing issues with weight computation in erupt

Signed-off-by: 1andrin <[email protected]>

linter

Signed-off-by: 1andrin <[email protected]>

fix rounding issues

Signed-off-by: 1andrin <[email protected]>

add and organize current results (plots)

Signed-off-by: 1andrin <[email protected]>

fix outstanding test on Andrin's branch

Cleanup of ERUPT logic, ERUPT-specific tests pass
  • Loading branch information
1andrin authored and EgorKraevTransferwise committed Nov 28, 2024
1 parent 0ec25ee commit c128bb1
Show file tree
Hide file tree
Showing 130 changed files with 2,793 additions and 1,648 deletions.
2 changes: 1 addition & 1 deletion causaltune/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from causaltune.optimiser import CausalTune
from causaltune.visualizer import Visualizer
from causaltune.scoring import Scorer
from causaltune.score.scoring import Scorer

__all__ = ["CausalTune", "Visualizer", "Scorer"]
1 change: 1 addition & 0 deletions causaltune/dataset_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class CausalityDatasetProcessor(BaseEstimator, TransformerMixin):
outcome (str): The target variable used for encoding.
encoder: Encoder object used during feature transformations.
"""

def __init__(self):
"""
Initializes CausalityDatasetProcessor with default attributes for encoder_type, outcome, and encoder.
Expand Down
112 changes: 62 additions & 50 deletions causaltune/datasets.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import pandas as pd
import numpy as np
import pickle
import os
from scipy import special

# from scipy.stats import betabinom
Expand All @@ -12,10 +14,8 @@


def linear_multi_dataset(
n_points=10000,
impact=None,
include_propensity=False,
include_control=False) -> CausalityDataset:
n_points=10000, impact=None, include_propensity=False, include_control=False
) -> CausalityDataset:
if impact is None:
impact = {0: 0.0, 1: 2.0, 2: 1.0}
df = pd.DataFrame(
Expand Down Expand Up @@ -80,8 +80,9 @@ def nhefs() -> CausalityDataset:
df = df.loc[~missing]

df = df[covariates + ["qsmk"] + ["wt82_71"]]
df.rename(columns={c: "x" + str(i + 1)
for i, c in enumerate(covariates)}, inplace=True)
df.rename(
columns={c: "x" + str(i + 1) for i, c in enumerate(covariates)}, inplace=True
)

return CausalityDataset(df, treatment="qsmk", outcomes=["wt82_71"])

Expand Down Expand Up @@ -172,8 +173,7 @@ def amazon_reviews(rating="pos") -> CausalityDataset:
gdown.download(url, "amazon_" + rating + ".csv", fuzzy=True)
df = pd.read_csv("amazon_" + rating + ".csv")
df.drop(df.columns[[2, 3, 4]], axis=1, inplace=True)
df.columns = ["treatment", "y_factual"] + \
["x" + str(i) for i in range(1, 301)]
df.columns = ["treatment", "y_factual"] + ["x" + str(i) for i in range(1, 301)]
return CausalityDataset(df, "treatment", ["y_factual"])
else:
print(
Expand Down Expand Up @@ -226,14 +226,10 @@ def synth_ihdp(return_df=False) -> CausalityDataset:
data.columns = col
# drop the columns we don't care about
ignore_patterns = ["y_cfactual", "mu"]
ignore_cols = [c for c in data.columns if any(
[s in c for s in ignore_patterns])]
ignore_cols = [c for c in data.columns if any([s in c for s in ignore_patterns])]
data = data.drop(columns=ignore_cols)

return CausalityDataset(
data,
"treatment",
["y_factual"]) if not return_df else data
return CausalityDataset(data, "treatment", ["y_factual"]) if not return_df else data


def synth_acic(condition=1) -> CausalityDataset:
Expand Down Expand Up @@ -347,7 +343,7 @@ def generate_synthetic_data(
noisy_outcomes: bool = False,
effect_size: Union[int, None] = None,
add_instrument: bool = False,
known_propensity: bool = False
known_propensity: bool = False,
) -> CausalityDataset:
"""Generates synthetic dataset with conditional treatment effect (CATE) and optional instrumental variable.
Supports RCT (unconfounded) and observational (confounded) data.
Expand Down Expand Up @@ -421,18 +417,11 @@ def mu(X):
Y = tau * T + Y_base

features = [f"X{i+1}" for i in range(n_covariates)]
df = pd.DataFrame(np.array([*X.T,
T,
Y,
tau,
known_p,
Y_base]).T,
columns=features + ["treatment",
"outcome",
"true_effect",
"propensity",
"base_outcome"],
)
df = pd.DataFrame(
np.array([*X.T, T, Y, tau, known_p, Y_base]).T,
columns=features
+ ["treatment", "outcome", "true_effect", "propensity", "base_outcome"],
)
data = CausalityDataset(
data=df,
treatment="treatment",
Expand All @@ -455,7 +444,7 @@ def generate_linear_synthetic_data(
noisy_outcomes: bool = False,
effect_size: Union[int, None] = None,
add_instrument: bool = False,
known_propensity: bool = False
known_propensity: bool = False,
) -> CausalityDataset:
"""Generates synthetic dataset with linear treatment effect (CATE) and optional instrumental variable.
Supports RCT (unconfounded) and observational (confounded) data.
Expand Down Expand Up @@ -531,18 +520,11 @@ def mu(X):
Y = tau * T + Y_base

features = [f"X{i+1}" for i in range(n_covariates)]
df = pd.DataFrame(np.array([*X.T,
T,
Y,
tau,
known_p,
Y_base]).T,
columns=features + ["treatment",
"outcome",
"true_effect",
"propensity",
"base_outcome"],
)
df = pd.DataFrame(
np.array([*X.T, T, Y, tau, known_p, Y_base]).T,
columns=features
+ ["treatment", "outcome", "true_effect", "propensity", "base_outcome"],
)
data = CausalityDataset(
data=df,
treatment="treatment",
Expand Down Expand Up @@ -652,16 +634,8 @@ def generate_non_random_dataset(num_samples=1000):
)
treatment = np.random.binomial(1, propensity)
outcome = (
0.2
* treatment
+ 0.5
* x1
- 0.2
* x2
+ np.random.normal(
0,
1,
num_samples))
0.2 * treatment + 0.5 * x1 - 0.2 * x2 + np.random.normal(0, 1, num_samples)
)

dataset = {
"T": treatment,
Expand Down Expand Up @@ -740,3 +714,41 @@ def mlrate_experiment_synth_dgp(
cd = CausalityDataset(data=df, outcomes=["Y"], treatment="T")

return cd


def save_dataset(dataset: CausalityDataset, filename: str):
"""
Save a CausalityDataset object to a file using pickle.
Args:
dataset (CausalityDataset): The dataset to save.
filename (str): The name of the file to save the dataset to.
"""
with open(filename, "wb") as f:
pickle.dump(dataset, f)
print(f"Dataset saved to {filename}")


def load_dataset(filename: str) -> CausalityDataset:
"""
Load a CausalityDataset object from a file using pickle.
Args:
filename (str): The name of the file to load the dataset from.
Returns:
CausalityDataset: The loaded dataset.
"""
if not os.path.exists(filename):
raise FileNotFoundError(f"File {filename} not found.")

with open(filename, "rb") as f:
dataset = pickle.load(f)

if not isinstance(dataset, CausalityDataset):
raise ValueError(
f"The file {filename} does not contain a valid CausalityDataset object."
)

print(f"Dataset loaded from {filename}")
return dataset
174 changes: 0 additions & 174 deletions causaltune/erupt.py

This file was deleted.

2 changes: 1 addition & 1 deletion causaltune/models/dummy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pandas as pd

from causaltune.models.wrapper import DoWhyMethods, DoWhyWrapper
from causaltune.scoring import Scorer
from causaltune.score.scoring import Scorer

from dowhy.causal_estimators.instrumental_variable_estimator import (
InstrumentalVariableEstimator,
Expand Down
Loading

0 comments on commit c128bb1

Please sign in to comment.