Skip to content

Commit

Permalink
Merge pull request #2156 from moj-analytical-services/switch_to_ruff_2
Browse files Browse the repository at this point in the history
switch to ruff
  • Loading branch information
RobinL authored Apr 30, 2024
2 parents 1b9a3ea + 9843d8b commit 8e121e2
Show file tree
Hide file tree
Showing 36 changed files with 447 additions and 572 deletions.
58 changes: 0 additions & 58 deletions .github/workflows/autoblack.yml

This file was deleted.

58 changes: 23 additions & 35 deletions .github/workflows/lint.yml
Original file line number Diff line number Diff line change
@@ -1,44 +1,32 @@
name: Lint
on: [pull_request]

env:
PYTHON_VERSION: "3.12.1"
name: autoruff
on:
pull_request:
branches:
- master
- "**dev"
paths:
- "**/*.py"
- "pyproject.toml"

jobs:
build:
lint:
runs-on: ubuntu-latest

name: Lint with Ruff using Python 3.12.1
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v4
with:
python-version: ${{ env.PYTHON_VERSION }}
- uses: actions/checkout@v4

- name: Load cached Poetry installation
uses: actions/cache@v2
with:
path: ~/.local # the path depends on the OS
key: poetry-0 # increment to reset cache
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: '1.7.0'
virtualenvs-create: true
virtualenvs-in-project: true
installer-parallel: true
- name: Install poetry using pipx
run: pipx install poetry && pipx ensurepath

- name: Load cached venv
id: cached-poetry-dependencies
uses: actions/cache@v2
- uses: actions/setup-python@v5
with:
path: .venv
key: venv-lint-${{ runner.os }}-${{ steps.setup-python.outputs.python-version }}-${{ hashFiles('**/poetry.lock') }}-00
- name: Install linting dependencies
if: steps.cached-poetry-dependencies.outputs.cache-hit != 'true'
run: poetry install --no-interaction --no-root --only linting
python-version: "3.12.1"
cache: "poetry"

- name: Lint Python files with ruff
- name: Install dependencies
run: |
source .venv/bin/activate
ruff --show-source .
poetry config virtualenvs.in-project true
poetry install --no-interaction --no-root --only linting
- name: Run Ruff formatting
run: poetry run ruff check --output-format=full
1 change: 1 addition & 0 deletions docs/hooks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,7 @@ def re_route_links(markdown: str, page_title: str) -> str | None:

# hooks for use by mkdocs


# priority last - run this after any other such hooks
# this ensures we are overwriting mknotebooks config,
# not the other way round
Expand Down
723 changes: 347 additions & 376 deletions poetry.lock

Large diffs are not rendered by default.

7 changes: 3 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,7 @@ igraph = ">=0.11.2"

[tool.poetry.group.linting]
[tool.poetry.group.linting.dependencies]
black = "22.6.0"
ruff = "0.0.257"
ruff = "^0.4.2"

[tool.poetry.group.testing]
[tool.poetry.group.testing.dependencies]
Expand Down Expand Up @@ -73,7 +72,7 @@ profile = "black"

[tool.ruff]
line-length = 88
select = [
lint.select = [
# Pyflakes
"F",
# Pycodestyle
Expand All @@ -86,7 +85,7 @@ select = [
# flake8-print
"T20"
]
ignore = [
lint.ignore = [
"B905", # `zip()` without an explicit `strict=` parameter
"B006", # Do not use mutable data structures for argument defaults"
]
Expand Down
2 changes: 1 addition & 1 deletion splink/blocking_rule_creator_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@


def to_blocking_rule_creator(
blocking_rule_creator: Union[dict[str, Any], str, BlockingRuleCreator]
blocking_rule_creator: Union[dict[str, Any], str, BlockingRuleCreator],
):
if isinstance(blocking_rule_creator, dict):
return CustomRule(**blocking_rule_creator)
Expand Down
40 changes: 20 additions & 20 deletions splink/charts.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def accuracy_chart(records, width=400, height=400, as_dict=False, add_metrics=[]
"f2": "F2",
"f0_5": "F0.5",
"p4": "P4",
"phi": "\u03C6 (MCC)",
"phi": "\u03c6 (MCC)",
}
chart["transform"][2]["calculate"] = chart["transform"][2]["calculate"].replace(
"__mapping__", str(mapping)
Expand Down Expand Up @@ -313,7 +313,7 @@ def threshold_selection_tool(records, as_dict=False, add_metrics=[]):
"f2": "F2",
"f0_5": "F0.5",
"p4": "P4",
"phi": "\u03C6 (MCC)",
"phi": "\u03c6 (MCC)",
}
chart["hconcat"][1]["transform"][2]["calculate"] = chart["hconcat"][1]["transform"][
2
Expand Down Expand Up @@ -380,32 +380,32 @@ def unlinkables_chart(
unlinkables_chart_def["data"]["values"] = records

if x_col == "match_probability":
unlinkables_chart_def["layer"][0]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"][
"title"
] = "Threshold match probability"
unlinkables_chart_def["layer"][0]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"]["title"] = (
"Threshold match probability"
)
unlinkables_chart_def["layer"][0]["encoding"]["x"]["axis"]["format"] = ".2"

unlinkables_chart_def["layer"][1]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][1]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][1]["selection"]["selector112"]["fields"] = [
"match_probability",
"cum_prop",
]

unlinkables_chart_def["layer"][2]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][2]["encoding"]["x"]["axis"][
"title"
] = "Threshold match probability"
unlinkables_chart_def["layer"][2]["encoding"]["x"]["field"] = (
"match_probability"
)
unlinkables_chart_def["layer"][2]["encoding"]["x"]["axis"]["title"] = (
"Threshold match probability"
)

unlinkables_chart_def["layer"][3]["encoding"]["x"][
"field"
] = "match_probability"
unlinkables_chart_def["layer"][3]["encoding"]["x"]["field"] = (
"match_probability"
)

if source_dataset:
unlinkables_chart_def["title"]["text"] += f" - {source_dataset}"
Expand Down
2 changes: 1 addition & 1 deletion splink/column_expression.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def _clone(self) -> "ColumnExpression":

@staticmethod
def instantiate_if_str(
str_or_column_expression: Union[str, "ColumnExpression"]
str_or_column_expression: Union[str, "ColumnExpression"],
) -> "ColumnExpression":
if isinstance(str_or_column_expression, ColumnExpression):
return str_or_column_expression
Expand Down
1 change: 0 additions & 1 deletion splink/comparison.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(
comparison_description: str = None,
column_info_settings: ColumnInfoSettings = None,
):

self.comparison_levels: list[ComparisonLevel] = comparison_levels

self._column_info_settings: Optional[ColumnInfoSettings] = column_info_settings
Expand Down
1 change: 0 additions & 1 deletion splink/comparison_level.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,6 @@ def _exact_match_colnames(self):
def _u_probability_corresponding_to_exact_match(
self, comparison_levels: list[ComparisonLevel]
):

if self.disable_tf_exact_match_detection:
return self.u_probability

Expand Down
2 changes: 1 addition & 1 deletion splink/comparison_level_composition.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@


def _ensure_is_comparison_level_creator(
cl: Union[ComparisonLevelCreator, dict[str, Any]]
cl: Union[ComparisonLevelCreator, dict[str, Any]],
) -> ComparisonLevelCreator:
if isinstance(cl, dict):
from .comparison_level_library import CustomLevel
Expand Down
1 change: 0 additions & 1 deletion splink/comparison_level_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,6 @@ def __init__(
self.input_is_string = input_is_string

def convert_time_metric_to_seconds(self, threshold: float, metric: str) -> float:

conversion_factors = {
"second": 1,
"minute": 60,
Expand Down
3 changes: 0 additions & 3 deletions splink/comparison_template_library.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def __init__(
separate_1st_january: bool = False,
use_damerau_levenshtein: bool = True,
):

date_thresholds_as_iterable = ensure_is_iterable(datetime_thresholds)
self.datetime_thresholds = [*date_thresholds_as_iterable]
date_metrics_as_iterable = ensure_is_iterable(datetime_metrics)
Expand Down Expand Up @@ -88,7 +87,6 @@ def datetime_parse_function(self):
return self.col_expression.try_parse_date

def create_comparison_levels(self) -> List[ComparisonLevelCreator]:

if self.invalid_dates_as_null:
null_col = self.datetime_parse_function(self.datetime_format)
else:
Expand Down Expand Up @@ -153,7 +151,6 @@ def create_comparison_levels(self) -> List[ComparisonLevelCreator]:
return levels

def create_description(self) -> str:

comparison_desc = "Exact match "
if self.separate_1st_january:
comparison_desc += "(with separate 1st Jan) "
Expand Down
1 change: 0 additions & 1 deletion splink/database_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,6 @@ def register_multiple_tables(
def register_table(
self, input_table, table_name, overwrite=False
) -> SplinkDataFrame:

tables_dict = self.register_multiple_tables(
[input_table], [table_name], overwrite=overwrite
)
Expand Down
12 changes: 6 additions & 6 deletions splink/em_training_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,9 +115,9 @@ def __init__(
cc_names_to_deactivate = [
cc.output_column_name for cc in comparisons_to_deactivate
]
self._comparisons_that_cannot_be_estimated: list[
Comparison
] = comparisons_to_deactivate
self._comparisons_that_cannot_be_estimated: list[Comparison] = (
comparisons_to_deactivate
)

filtered_ccs = [
cc
Expand Down Expand Up @@ -339,9 +339,9 @@ def _iteration_history_records(self):
for r in records:
r["iteration"] = iteration
# TODO: why lambda from current settings, not history?
r[
"probability_two_random_records_match"
] = self.core_model_settings.probability_two_random_records_match
r["probability_two_random_records_match"] = (
self.core_model_settings.probability_two_random_records_match
)

output_records.extend(records)
return output_records
Expand Down
6 changes: 3 additions & 3 deletions splink/expectation_maximisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,9 +408,9 @@ def _max_change_in_parameters_comparison_levels(
max_change_levels["prev_comparison_level"] = None
max_change_levels["current_comparison_level"] = None
max_change_levels["max_change_type"] = "probability_two_random_records_match"
max_change_levels[
"max_change_value"
] = change_probability_two_random_records_match
max_change_levels["max_change_value"] = (
change_probability_two_random_records_match
)
max_change_levels["max_abs_change_value"] = abs(
change_probability_two_random_records_match
)
Expand Down
1 change: 0 additions & 1 deletion splink/find_brs_with_comparison_counts_below_threshold.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ def _generate_blocking_rule(
if len(cols_as_string) == 0:
br: BlockingRuleCreator = CustomRule("1=1", linker._sql_dialect)
else:

br = block_on(*cols_as_string)

return br.get_blocking_rule(linker._sql_dialect)
Expand Down
1 change: 0 additions & 1 deletion splink/find_matches_to_new_records.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@ def add_unique_id_and_source_dataset_cols_if_needed(
# Add source dataset column to new records if required and not exists
sds_sel_sql = ""
if sds_col := linker._settings_obj.column_info_settings.source_dataset_column_name:

if sds_col not in cols:
sds_sel_sql = f", 'new_record' as {sds_col}"

Expand Down
Loading

0 comments on commit 8e121e2

Please sign in to comment.