Skip to content

Commit

Permalink
by_symbol_match : improve heuristic
Browse files Browse the repository at this point in the history
  • Loading branch information
Roxane committed Oct 29, 2024
1 parent 6645610 commit 1c22425
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 6 deletions.
13 changes: 8 additions & 5 deletions iso4217parse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,14 +199,17 @@ def by_symbol_match(value, country_code=None):
List[Currency]: Currency objects found in `value`; filter by country_code.
"""
res = None
for s, group in _symbols():
if s.lower() in value.lower():
for symbol, group in _symbols():
symbol_pattern = symbol
symbol_pattern = symbol_pattern.replace("$", r"\$") # $ means endline
symbol_pattern = symbol_pattern.replace(".", r"\.") # . means anything
if re.search(rf"(^|\b|\d|\s){symbol_pattern}([^A-Z]|$)", value, re.I):
if group == 'symbol':
res = by_symbol(s, country_code)
res = by_symbol(symbol, country_code)
if group == 'alpha3':
res = [by_alpha3(s)]
res = [by_alpha3(symbol)]
if group == 'name':
res = [_data()['name'][s]]
res = [_data()['name'][symbol]]
if res and country_code is not None:
res = [
currency
Expand Down
30 changes: 29 additions & 1 deletion tests/test_by_symbol.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,4 +55,32 @@ def test_by_symbol_match_filters_country_code(
def test_parse_by_symbol_value_is_case_insensitive(text, expected_alpha3):
res = iso4217parse.by_symbol_match(text)
assert len(res) == 1
assert res[0].alpha3 == expected
assert res[0].alpha3 == expected_alpha3


@pytest.mark.parametrize(
'text, ambiguous_alpha3, wanted_alpha3',
(
# ambiguous words + symbol
('cost $100', 'WST', 'USD'), # st$ => WST
# only letters found in words
('durée 100', 'SZL', None), # e => SWL / no currency
('maximum 100', 'MRO', None), # um => MRO / no currency
('flowers', 'NPR', None), # Re => NPR / no currency
('flowers', 'LKR', None), # Re => LKR / no currency
('flowers', 'PKR', None), # Re => PRK / no currency
('amount : 100 currency : Ks', 'NPR', 'MMK'), # Re => NPR / Ks is MMK
('yes: 100l', 'SOS', 'ALL'), # s gives SOS / l is ALL or LSL
# alpha 3 codes found in words
('course', 'COU', None), # COU => COU
('finance', 'ANG', None), # ANG => ANG
),
)
def test_parse_by_symbol_value_disambiguation(text, ambiguous_alpha3, wanted_alpha3):
assert iso4217parse.by_alpha3(ambiguous_alpha3) not in (iso4217parse.by_symbol_match(text) or [])

if wanted_alpha3:
assert iso4217parse.by_alpha3(wanted_alpha3) in iso4217parse.by_symbol_match(text)
else:
assert iso4217parse.by_symbol_match(text) is None

0 comments on commit 1c22425

Please sign in to comment.