Skip to content

Commit

Permalink
64 rename na limit (#72)
Browse files Browse the repository at this point in the history
* wip: pandas 2, pydantic 2 compatibility

* fixed: tests with migration to pydantic v2

* fixed: generate_v2  write files

* fixed: removed py3.7 from matrix, added py3.10, py3.11

* fixed: removed py3.7 from matrix, added py3.10, py3.11

* fixed: python 3.10 issue

* fixed: formatted with black

* adding tomli

* fixed: added tomli as dev dependency

* fixed: added tomli as dev dependency

* fixed: added tomli as dev dependency

* fixed: added tomli as dev dependency

* fixed: added tomli as dev dependency

* fixed: issue#64 - correct na_pct_velow

* fixed: na_below_limit
  • Loading branch information
Casyfill authored Aug 13, 2024
1 parent c9ab2d8 commit 5e7fc07
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 13 deletions.
2 changes: 1 addition & 1 deletion dfschema/core/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def to_file(self, path: Union[str, Path]) -> None:
import yaml

with path.open("w") as f:
yaml.dump(schema_dict, f)
yaml.dump(schema_dict, f, default_flow_style=False)
except ImportError:
raise ImportError("PyYaml is required to load yaml files")
else:
Expand Down
4 changes: 3 additions & 1 deletion dfschema/core/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ def generate_schema_dict_from_df(df: pd.DataFrame) -> dict:
cd["dtype"] = (
"string" if pd.api.types.is_string_dtype(df[col]) else str(df[col].dtype)
)
cd["na_pct_below"] = max(0.01, (df[col].isnull().mean() + 0.1)) # +10%
cd["na_pct_below"] = min(
max(0.01, float(df[col].isnull().mean() + 0.1)), 1.0
) # +10%

if pd.api.types.is_numeric_dtype(df[col]):
add_range = 0.05 * df[col].std()
Expand Down
20 changes: 11 additions & 9 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,18 +53,20 @@ def test_cli_validate_error():


def test_cli_update():
from tempfile import TemporaryDirectory
from dfschema.cli import app
from dfschema.core.config import CURRENT_PROTOCOL_VERSION

output_path = "active_sales_v2.json"
result = runner.invoke(
app,
[
"update",
"tests/test_schemas/v1/good/active_sales.json",
output_path,
],
)
with TemporaryDirectory() as tmpdirname:
output_path = f"{tmpdirname}/active_sales_v2.json"
result = runner.invoke(
app,
[
"update",
"tests/test_schemas/v1/good/active_sales.json",
output_path,
],
)

assert result.exit_code == 0, result.stdout

Expand Down
7 changes: 5 additions & 2 deletions tests/test_read_write.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ def test_write_schema_file(format, sample_df):
from dfschema import DfSchema
from tempfile import TemporaryDirectory

schema = DfSchema.from_df(sample_df)
schema: DfSchema = DfSchema.from_df(sample_df) # type: ignore

# create a temporary directory using the context manager
with TemporaryDirectory() as tmpdirname:
Expand All @@ -44,7 +44,10 @@ def test_write_schema_file(format, sample_df):
if format == "yml":
import yaml

schema_structure = yaml.safe_load(txt)
try:
schema_structure = yaml.safe_load(txt)
except Exception as e:
raise Exception(txt) from e
elif format == "json":
import json

Expand Down

0 comments on commit 5e7fc07

Please sign in to comment.