Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

switch to ruff #2156

Merged
merged 4 commits into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 0 additions & 58 deletions .github/workflows/autoblack.yml

This file was deleted.

58 changes: 23 additions & 35 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,44 +1,32 @@
name: Lint
on: [pull_request]

env:
PYTHON_VERSION: "3.12.1"
name: autoruff
on:
pull_request:
branches:
- master
- "**dev"
paths:
- "**/*.py"
- "pyproject.toml"

jobs:
build:
lint:
runs-on: ubuntu-latest

name: Lint with Ruff using Python 3.12.1
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/checkout@v4

- name: Load cached Poetry installation
uses: actions/cache@v2
with:
path: ~/.local # the path depends on the OS
key: poetry-0 # increment to reset cache
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: '1.7.0'
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install poetry using pipx
run: pipx install poetry && pipx ensurepath

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v2
- uses: actions/setup-python@v5
with:
path: .venv
key: venv-lint-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-00
- name: Install linting dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root --only linting
python-version: "3.12.1"
cache: "poetry"

- name: Lint Python files with ruff
- name: Install dependencies
run: |
source .venv/bin/activate
ruff --show-source .
poetry config virtualenvs.in-project true
poetry install --no-interaction --no-root --only linting

- name: Run Ruff formatting
run: poetry run ruff check --output-format=full
1 change: 1 addition & 0 deletions docs/hooks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def re_route_links(markdown: str, page_title: str) -> str | None:

# hooks for use by mkdocs


# priority last - run this after any other such hooks
# this ensures we are overwriting mknotebooks config,
# not the other way round
Expand Down
723 changes: 347 additions & 376 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ igraph = ">=0.11.2"

[tool.poetry.group.linting]
[tool.poetry.group.linting.dependencies]
black = "22.6.0"
ruff = "0.0.257"
ruff = "^0.4.2"

[tool.poetry.group.testing]
[tool.poetry.group.testing.dependencies]
Expand Down Expand Up @@ -73,7 +72,7 @@ profile = "black"

[tool.ruff]
line-length = 88
select = [
lint.select = [
# Pyflakes
"F",
# Pycodestyle
Expand All @@ -86,7 +85,7 @@ select = [
# flake8-print
"T20"
]
ignore = [
lint.ignore = [
"B905", # `zip()` without an explicit `strict=` parameter
"B006", # Do not use mutable data structures for argument defaults"
]
Expand Down
2 changes: 1 addition & 1 deletion splink/blocking_rule_creator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def to_blocking_rule_creator(
blocking_rule_creator: Union[dict[str, Any], str, BlockingRuleCreator]
blocking_rule_creator: Union[dict[str, Any], str, BlockingRuleCreator],
):
if isinstance(blocking_rule_creator, dict):
return CustomRule(**blocking_rule_creator)
Expand Down
40 changes: 20 additions & 20 deletions splink/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def accuracy_chart(records, width=400, height=400, as_dict=False, add_metrics=[]
"f2": "F2",
"f0_5": "F0.5",
"p4": "P4",
"phi": "\u03C6 (MCC)",
"phi": "\u03c6 (MCC)",
}
chart["transform"][2]["calculate"] = chart["transform"][2]["calculate"].replace(
"__mapping__", str(mapping)
Expand Down Expand Up @@ -313,7 +313,7 @@ def threshold_selection_tool(records, as_dict=False, add_metrics=[]):
"f2": "F2",
"f0_5": "F0.5",
"p4": "P4",
"phi": "\u03C6 (MCC)",
"phi": "\u03c6 (MCC)",
}
chart["hconcat"][1]["transform"][2]["calculate"] = chart["hconcat"][1]["transform"][
2
Expand Down Expand Up @@ -380,32 +380,32 @@ def unlinkables_chart(
unlinkables_chart_def["data"]["values"] = records

if x_col == "match_probability":
unlinkables_chart_def["layer"][0]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"][
"title"
] = "Threshold match probability"
unlinkables_chart_def["layer"][0]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"]["title"] = (
"Threshold match probability"
)
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"]["format"] = ".2"

unlinkables_chart_def["layer"][1]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][1]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][1]["selection"]["selector112"]["fields"] = [
"match_probability",
"cum_prop",
]

unlinkables_chart_def["layer"][2]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][2]["encoding"]["x"]["axis"][
"title"
] = "Threshold match probability"
unlinkables_chart_def["layer"][2]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][2]["encoding"]["x"]["axis"]["title"] = (
"Threshold match probability"
)

unlinkables_chart_def["layer"][3]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][3]["encoding"]["x"]["field"] = (
"match_probability"
)

if source_dataset:
unlinkables_chart_def["title"]["text"] += f" - {source_dataset}"
Expand Down
2 changes: 1 addition & 1 deletion splink/column_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _clone(self) -> "ColumnExpression":

@staticmethod
def instantiate_if_str(
str_or_column_expression: Union[str, "ColumnExpression"]
str_or_column_expression: Union[str, "ColumnExpression"],
) -> "ColumnExpression":
if isinstance(str_or_column_expression, ColumnExpression):
return str_or_column_expression
Expand Down
1 change: 0 additions & 1 deletion splink/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
comparison_description: str = None,
column_info_settings: ColumnInfoSettings = None,
):

self.comparison_levels: list[ComparisonLevel] = comparison_levels

self._column_info_settings: Optional[ColumnInfoSettings] = column_info_settings
Expand Down
1 change: 0 additions & 1 deletion splink/comparison_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,6 @@ def _exact_match_colnames(self):
def _u_probability_corresponding_to_exact_match(
self, comparison_levels: list[ComparisonLevel]
):

if self.disable_tf_exact_match_detection:
return self.u_probability

Expand Down
2 changes: 1 addition & 1 deletion splink/comparison_level_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def _ensure_is_comparison_level_creator(
cl: Union[ComparisonLevelCreator, dict[str, Any]]
cl: Union[ComparisonLevelCreator, dict[str, Any]],
) -> ComparisonLevelCreator:
if isinstance(cl, dict):
from .comparison_level_library import CustomLevel
Expand Down
1 change: 0 additions & 1 deletion splink/comparison_level_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,6 @@ def __init__(
self.input_is_string = input_is_string

def convert_time_metric_to_seconds(self, threshold: float, metric: str) -> float:

conversion_factors = {
"second": 1,
"minute": 60,
Expand Down
3 changes: 0 additions & 3 deletions splink/comparison_template_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def __init__(
separate_1st_january: bool = False,
use_damerau_levenshtein: bool = True,
):

date_thresholds_as_iterable = ensure_is_iterable(datetime_thresholds)
self.datetime_thresholds = [*date_thresholds_as_iterable]
date_metrics_as_iterable = ensure_is_iterable(datetime_metrics)
Expand Down Expand Up @@ -88,7 +87,6 @@ def datetime_parse_function(self):
return self.col_expression.try_parse_date

def create_comparison_levels(self) -> List[ComparisonLevelCreator]:

if self.invalid_dates_as_null:
null_col = self.datetime_parse_function(self.datetime_format)
else:
Expand Down Expand Up @@ -153,7 +151,6 @@ def create_comparison_levels(self) -> List[ComparisonLevelCreator]:
return levels

def create_description(self) -> str:

comparison_desc = "Exact match "
if self.separate_1st_january:
comparison_desc += "(with separate 1st Jan) "
Expand Down
1 change: 0 additions & 1 deletion splink/database_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def register_multiple_tables(
def register_table(
self, input_table, table_name, overwrite=False
) -> SplinkDataFrame:

tables_dict = self.register_multiple_tables(
[input_table], [table_name], overwrite=overwrite
)
Expand Down
12 changes: 6 additions & 6 deletions splink/em_training_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def __init__(
cc_names_to_deactivate = [
cc.output_column_name for cc in comparisons_to_deactivate
]
self._comparisons_that_cannot_be_estimated: list[
Comparison
] = comparisons_to_deactivate
self._comparisons_that_cannot_be_estimated: list[Comparison] = (
comparisons_to_deactivate
)

filtered_ccs = [
cc
Expand Down Expand Up @@ -339,9 +339,9 @@ def _iteration_history_records(self):
for r in records:
r["iteration"] = iteration
# TODO: why lambda from current settings, not history?
r[
"probability_two_random_records_match"
] = self.core_model_settings.probability_two_random_records_match
r["probability_two_random_records_match"] = (
self.core_model_settings.probability_two_random_records_match
)

output_records.extend(records)
return output_records
Expand Down
6 changes: 3 additions & 3 deletions splink/expectation_maximisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,9 @@ def _max_change_in_parameters_comparison_levels(
max_change_levels["prev_comparison_level"] = None
max_change_levels["current_comparison_level"] = None
max_change_levels["max_change_type"] = "probability_two_random_records_match"
max_change_levels[
"max_change_value"
] = change_probability_two_random_records_match
max_change_levels["max_change_value"] = (
change_probability_two_random_records_match
)
max_change_levels["max_abs_change_value"] = abs(
change_probability_two_random_records_match
)
Expand Down
1 change: 0 additions & 1 deletion splink/find_brs_with_comparison_counts_below_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def _generate_blocking_rule(
if len(cols_as_string) == 0:
br: BlockingRuleCreator = CustomRule("1=1", linker._sql_dialect)
else:

br = block_on(*cols_as_string)

return br.get_blocking_rule(linker._sql_dialect)
Expand Down
1 change: 0 additions & 1 deletion splink/find_matches_to_new_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def add_unique_id_and_source_dataset_cols_if_needed(
# Add source dataset column to new records if required and not exists
sds_sel_sql = ""
if sds_col := linker._settings_obj.column_info_settings.source_dataset_column_name:

if sds_col not in cols:
sds_sel_sql = f", 'new_record' as {sds_col}"

Expand Down
Loading
Loading