Skip to content

Commit

Permalink
Fix hashed linting and backward compatibility issues.
Browse files Browse the repository at this point in the history
  • Loading branch information
matthewwardrop committed Dec 24, 2023
1 parent 45b929c commit 7f6af7d
Showing 1 changed file with 15 additions and 7 deletions.
22 changes: 15 additions & 7 deletions formulaic/transforms/hashed.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
from __future__ import annotations

import sys
from hashlib import md5
from numbers import Number
from typing import Any, Callable, Dict, Iterable, List, Optional, Union
from typing import Any, Callable, Dict, Iterable, List, Optional, Union, TYPE_CHECKING

import numpy as np

from formulaic.materializers.types import FactorValues

from .contrasts import Contrasts, encode_contrasts

if TYPE_CHECKING:
from formulaic.model_spec import ModelSpec


def md5_to_int(s: str) -> int:
return int(md5(s.encode(), usedforsecurity=False).hexdigest(), 16)
if sys.version_info >= (3, 9):
hashed = md5(s.encode(), usedforsecurity=False)
else:
hashed = md5(s.encode())
return int(hashed.hexdigest(), 16)


def hashed(
Expand All @@ -22,7 +32,7 @@ def hashed(
*,
hash_func: Callable[[str], int] = md5_to_int,
spans_intercept: bool = False,
):
) -> FactorValues:
"""
Deterministically hashes the values of a factor into a fixed number of levels.
If `levels` is large, you will likely want to use this transform in conjunction
Expand All @@ -48,16 +58,14 @@ def encoder(
reduced_rank: bool,
drop_rows: List[int],
encoder_state: Dict[str, Any],
model_spec,
):
model_spec: ModelSpec,
) -> FactorValues:
values = np.array(values)
empty_state = {}
return encode_contrasts(
values,
contrasts=contrasts,
levels=np.arange(levels),
reduced_rank=reduced_rank,
_state=empty_state,
_spec=model_spec,
)

Expand Down

0 comments on commit 7f6af7d

Please sign in to comment.