From 66456106e8ba909199aea36d6db230e1eb769d40 Mon Sep 17 00:00:00 2001 From: Roxane Date: Mon, 28 Oct 2024 17:51:09 +0100 Subject: [PATCH 1/2] by_symbol_match : test function is case insensitive --- tests/test_by_symbol.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/test_by_symbol.py b/tests/test_by_symbol.py index 4738cd5..8685fa7 100644 --- a/tests/test_by_symbol.py +++ b/tests/test_by_symbol.py @@ -37,3 +37,22 @@ def test_by_symbol_match_filters_country_code( res = iso4217parse.by_symbol_match(example_string, country_code) assert len(res) == 1 assert res[0].alpha3 == expected + + +@pytest.mark.parametrize( + 'text, expected_alpha3', + ( + # symbol should be lowercase + ('lek', 'ALL'), + ('Lek', 'ALL'), + ('LEK', 'ALL'), + # symbol should be uppercase + ('DH', 'AED'), + ('Dh', 'AED'), + ('dh', 'AED'), + ), +) +def test_parse_by_symbol_value_is_case_insensitive(text, expected_alpha3): + res = iso4217parse.by_symbol_match(text) + assert len(res) == 1 + assert res[0].alpha3 == expected From 6a1547d91033bee7409dcb8079762d2b04363f3f Mon Sep 17 00:00:00 2001 From: Roxane Date: Mon, 28 Oct 2024 17:51:22 +0100 Subject: [PATCH 2/2] by_symbol_match : improve heuristic --- iso4217parse/__init__.py | 11 ++++++----- tests/test_by_symbol.py | 30 +++++++++++++++++++++++++++++- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/iso4217parse/__init__.py b/iso4217parse/__init__.py index 1ab66ce..78130b4 100644 --- a/iso4217parse/__init__.py +++ b/iso4217parse/__init__.py @@ -199,14 +199,15 @@ def by_symbol_match(value, country_code=None): List[Currency]: Currency objects found in `value`; filter by country_code. """ res = None - for s, group in _symbols(): - if s.lower() in value.lower(): + for symbol, group in _symbols(): + symbol_pattern = re.escape(symbol) + if re.search(rf"(^|\b|\d|\s){symbol_pattern}([^A-Z]|$)", value, re.I): if group == 'symbol': - res = by_symbol(s, country_code) + res = by_symbol(symbol, country_code) if group == 'alpha3': - res = [by_alpha3(s)] + res = [by_alpha3(symbol)] if group == 'name': - res = [_data()['name'][s]] + res = [_data()['name'][symbol]] if res and country_code is not None: res = [ currency diff --git a/tests/test_by_symbol.py b/tests/test_by_symbol.py index 8685fa7..30b3caa 100644 --- a/tests/test_by_symbol.py +++ b/tests/test_by_symbol.py @@ -55,4 +55,32 @@ def test_by_symbol_match_filters_country_code( def test_parse_by_symbol_value_is_case_insensitive(text, expected_alpha3): res = iso4217parse.by_symbol_match(text) assert len(res) == 1 - assert res[0].alpha3 == expected + assert res[0].alpha3 == expected_alpha3 + + +@pytest.mark.parametrize( + 'text, ambiguous_alpha3, wanted_alpha3', + ( + # ambiguous words + symbol + ('cost $100', 'WST', 'USD'), # st$ => WST + # only letters found in words + ('durée 100', 'SZL', None), # e => SWL / no currency + ('maximum 100', 'MRO', None), # um => MRO / no currency + ('flowers', 'NPR', None), # Re => NPR / no currency + ('flowers', 'LKR', None), # Re => LKR / no currency + ('flowers', 'PKR', None), # Re => PRK / no currency + ('amount : 100 currency : Ks', 'NPR', 'MMK'), # Re => NPR / Ks is MMK + ('yes: 100l', 'SOS', 'ALL'), # s gives SOS / l is ALL or LSL + # alpha 3 codes found in words + ('course', 'COU', None), # COU => COU + ('finance', 'ANG', None), # ANG => ANG + ), +) +def test_parse_by_symbol_value_disambiguation(text, ambiguous_alpha3, wanted_alpha3): + assert iso4217parse.by_alpha3(ambiguous_alpha3) not in (iso4217parse.by_symbol_match(text) or []) + + if wanted_alpha3: + assert iso4217parse.by_alpha3(wanted_alpha3) in iso4217parse.by_symbol_match(text) + else: + assert iso4217parse.by_symbol_match(text) is None +