From 11fae0a396723f87250e3ef98d0a4e6f4a03c1c5 Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Sun, 15 Mar 2020 00:02:41 -0700 Subject: [PATCH 1/5] fix #88: multiple decimal places * extract_number(), extract_numbers(), and all helper functions gain a keyword parameter `decimal_places` (for helpers, just `places`) which does what it sounds like, using builtin round(). * avoid capturing non-adjacent numbers as decimal places * avoid capturing already-used decimal places as separate numbers in extract_numbers() * add a few tests for the above --- lingua_franca/lang/parse_en.py | 80 ++++++++++++++++++++++++---------- lingua_franca/parse.py | 17 ++++++-- test/test_parse.py | 17 ++++++-- 3 files changed, 84 insertions(+), 30 deletions(-) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index 8054bf07..864a0f56 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -77,7 +77,7 @@ def generate_plurals_en(originals): _STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN) -def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False): +def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, places=None): """ Convert words in a string into their equivalent numbers. Args: @@ -94,7 +94,8 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False): text = text.lower() tokens = tokenize(text) numbers_to_replace = \ - _extract_numbers_with_text_en(tokens, short_scale, ordinals) + _extract_numbers_with_text_en( + tokens, short_scale, ordinals, places=places) numbers_to_replace.sort(key=lambda number: number.start_index) results = [] @@ -114,7 +115,8 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False): def _extract_numbers_with_text_en(tokens, short_scale=True, - ordinals=False, fractional_numbers=True): + ordinals=False, fractional_numbers=True, + places=None): """ Extract all numbers from a list of Tokens, with the words that represent them. @@ -138,7 +140,8 @@ def _extract_numbers_with_text_en(tokens, short_scale=True, while True: to_replace = \ _extract_number_with_text_en(tokens, short_scale, - ordinals, fractional_numbers) + ordinals, fractional_numbers, + places=places) if not to_replace: break @@ -156,7 +159,8 @@ def _extract_numbers_with_text_en(tokens, short_scale=True, def _extract_number_with_text_en(tokens, short_scale=True, - ordinals=False, fractional_numbers=True): + ordinals=False, fractional_numbers=True, + places=None): """ This function extracts a number from a list of Tokens. @@ -172,7 +176,8 @@ def _extract_number_with_text_en(tokens, short_scale=True, """ number, tokens = \ _extract_number_with_text_en_helper(tokens, short_scale, - ordinals, fractional_numbers) + ordinals, fractional_numbers, + places=places) while tokens and tokens[0].word in _ARTICLES_EN: tokens.pop(0) return ReplaceableNumber(number, tokens) @@ -180,7 +185,8 @@ def _extract_number_with_text_en(tokens, short_scale=True, def _extract_number_with_text_en_helper(tokens, short_scale=True, ordinals=False, - fractional_numbers=True): + fractional_numbers=True, + places=None): """ Helper for _extract_number_with_text_en. @@ -205,7 +211,8 @@ def _extract_number_with_text_en_helper(tokens, return fraction, fraction_text decimal, decimal_text = \ - _extract_decimal_with_text_en(tokens, short_scale, ordinals) + _extract_decimal_with_text_en( + tokens, short_scale, ordinals, places=places) if decimal: return decimal, decimal_text @@ -254,7 +261,7 @@ def _extract_fraction_with_text_en(tokens, short_scale, ordinals): return None, None -def _extract_decimal_with_text_en(tokens, short_scale, ordinals): +def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): """ Extract decimal numbers from a string. @@ -271,6 +278,7 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals): tokens [Token]: The text to parse. short_scale boolean: ordinals boolean: + places [int]: Number of decimal places to return Returns: (float, [Token]) @@ -284,21 +292,46 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals): if len(partitions) == 3: numbers1 = \ _extract_numbers_with_text_en(partitions[0], short_scale, - ordinals, fractional_numbers=False) + ordinals, fractional_numbers=False, + places=places) numbers2 = \ _extract_numbers_with_text_en(partitions[2], short_scale, - ordinals, fractional_numbers=False) - + ordinals, fractional_numbers=False, + places=places) if not numbers1 or not numbers2: return None, None + token_idx = numbers2[0].tokens[0].index + idx = 1 + stop = False + while idx < len(numbers2) and not stop: + if numbers2[idx].tokens[0].index != numbers2[idx-1].tokens[0].index + 1 or \ + numbers2[idx].value is None: + stop = True + else: + idx += 1 + numbers2 = numbers2[:idx] + number = numbers1[-1] - decimal = numbers2[0] + # decimal = numbers2[0] # TODO handle number dot number number number - if "." not in str(decimal.text): - return number.value + float('0.' + str(decimal.value)), \ - number.tokens + partitions[1] + decimal.tokens + if "." not in str(numbers2[0].text): + return_value = float('0.' + "".join([str( + decimal.value) for decimal in numbers2])) + return_value = number.value + return_value + if return_value == int(return_value): + return_value = int(return_value) + + # out_part2 = partitions[2] + # for n in numbers2: + # out_part2[n.index] = n.value + + return_tokens = number.tokens + partitions[1] + for n in numbers2: + return_tokens += n.tokens + + return (round(return_value, places) if places else return_value), return_tokens return None, None @@ -319,8 +352,8 @@ def _extract_whole_number_with_text_en(tokens, short_scale, ordinals): The value parsed, and tokens that it corresponds to. """ - multiplies, string_num_ordinal, string_num_scale = \ - _initialize_number_data(short_scale) + multiplies, string_num_ordinal, string_num_scale = _initialize_number_data( + short_scale) number_words = [] # type: [Token] val = False @@ -560,7 +593,7 @@ def _initialize_number_data(short_scale): return multiplies, string_num_ordinal_en, string_num_scale_en -def extractnumber_en(text, short_scale=True, ordinals=False): +def extractnumber_en(text, short_scale=True, ordinals=False, decimal_places=None): """ This function extracts a number from a text string, handles pronunciations in long scale and short scale @@ -571,13 +604,15 @@ def extractnumber_en(text, short_scale=True, ordinals=False): text (str): the string to normalize short_scale (bool): use short scale if True, long scale if False ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 + decimal_places (int or False): rounds to # decimal places. uses builtin round() Returns: (int) or (float) or False: The extracted number or False if no number was found """ return _extract_number_with_text_en(tokenize(text.lower()), - short_scale, ordinals).value + short_scale, ordinals, + places=decimal_places).value def extract_duration_en(text): @@ -1476,7 +1511,7 @@ def isFractional_en(input_str, short_scale=True): return False -def extract_numbers_en(text, short_scale=True, ordinals=False): +def extract_numbers_en(text, short_scale=True, ordinals=False, decimal_places=None): """ Takes in a string and extracts a list of numbers. @@ -1487,11 +1522,12 @@ def extract_numbers_en(text, short_scale=True, ordinals=False): is now common in most English speaking countries. See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 + decimal_places (int or False): rounds to # decimal places. uses builtin round() Returns: list: list of extracted numbers as floats """ results = _extract_numbers_with_text_en(tokenize(text), - short_scale, ordinals) + short_scale, ordinals, places=decimal_places) return [float(result.value) for result in results] diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 303baedd..547d1a9e 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -77,8 +77,12 @@ def match_one(query, choices): else: return best +# TODO update these docstrings when decimal_places has been implemented +# in all parsers -def extract_numbers(text, short_scale=True, ordinals=False, lang=None): + +def extract_numbers(text, short_scale=True, ordinals=False, lang=None, + decimal_places=False): """ Takes in a string and extracts a list of numbers. @@ -90,12 +94,14 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None): See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 lang (str): the BCP-47 code for the language to use, None uses default + decimal_places (int or False): rounds to # decimal places. Not yet implemented + in all languages. False performs no rounding. Uses builtin round() Returns: list: list of extracted numbers as floats, or empty list if none found """ lang_code = get_primary_lang_code(lang) if lang_code == "en": - return extract_numbers_en(text, short_scale, ordinals) + return extract_numbers_en(text, short_scale, ordinals, decimal_places) elif lang_code == "de": return extract_numbers_de(text, short_scale, ordinals) elif lang_code == "fr": @@ -112,7 +118,8 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None): return [] -def extract_number(text, short_scale=True, ordinals=False, lang=None): +def extract_number(text, short_scale=True, ordinals=False, lang=None, + decimal_places=False): """Takes in a string and extracts a number. Args: @@ -123,6 +130,8 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None): See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 lang (str): the BCP-47 code for the language to use, None uses default + decimal_places (int or False): rounds to # decimal places. Not yet implemented + in all languages. False performs no rounding. Uses builtin round() Returns: (int, float or False): The number extracted or False if the input text contains no numbers @@ -130,7 +139,7 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None): lang_code = get_primary_lang_code(lang) if lang_code == "en": return extractnumber_en(text, short_scale=short_scale, - ordinals=ordinals) + ordinals=ordinals, decimal_places=decimal_places) elif lang_code == "es": return extractnumber_es(text) elif lang_code == "pt": diff --git a/test/test_parse.py b/test/test_parse.py index 5046f42e..bcb90038 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -151,10 +151,12 @@ def test_extract_number(self): self.assertEqual(extract_number("eight hundred trillion two hundred \ fifty seven"), 800000000000257.0) - # TODO handle this case - # self.assertEqual( - # extract_number("6 dot six six six"), - # 6.666) + self.assertEqual(extract_number("6 dot six six six"), 6.666) + self.assertEqual(extract_number( + "6 dot six six six", decimal_places=2), round(6.666, 2)) + self.assertEqual(extract_number( + "6 point seventy", decimal_places=2), 6.7) + self.assertTrue(extract_number("The tennis player is fast") is False) self.assertTrue(extract_number("fraggle") is False) @@ -735,6 +737,13 @@ def test_multiple_numbers(self): self.assertEqual(extract_numbers("this is a seven eight nine and a" " half test"), [7.0, 8.0, 9.5]) + self.assertEqual(extract_numbers("this is a six point five seven nine" + " bingo ten nancy forty six test"), + [6.579, 10.0, 46.0]) + self.assertEqual(extract_numbers("this is a six point five seven nine" + " bingo ten nancy forty six test" + " with decimal rounding", decimal_places=2), + [round(6.579, 2), 10, 46]) def test_contractions(self): self.assertEqual(normalize("ain't"), "is not") From 715fdda2713fe76d1233bf457fd2d032b4d5dd3c Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Tue, 24 Mar 2020 00:17:05 -0700 Subject: [PATCH 2/5] fix #93 multiple decimals break extract_number --- lingua_franca/lang/parse_en.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index 864a0f56..5932d80d 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -289,7 +289,7 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): for c in _DECIMAL_MARKER: partitions = partition_list(tokens, lambda t: t.word == c) - if len(partitions) == 3: + if len(partitions) >= 3: numbers1 = \ _extract_numbers_with_text_en(partitions[0], short_scale, ordinals, fractional_numbers=False, @@ -301,7 +301,6 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): if not numbers1 or not numbers2: return None, None - token_idx = numbers2[0].tokens[0].index idx = 1 stop = False while idx < len(numbers2) and not stop: @@ -604,7 +603,7 @@ def extractnumber_en(text, short_scale=True, ordinals=False, decimal_places=None text (str): the string to normalize short_scale (bool): use short scale if True, long scale if False ordinals (bool): consider ordinal numbers, third=3 instead of 1/3 - decimal_places (int or False): rounds to # decimal places. uses builtin round() + decimal_places (int or None): rounds to # decimal places. uses builtin round() Returns: (int) or (float) or False: The extracted number or False if no number was found From 951b8df83e2b427293a29c5b50679533cf58d02d Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Mon, 30 Mar 2020 14:09:43 -0700 Subject: [PATCH 3/5] address feedback from PR, add granularity - more visible funcs now have more explicitly-named parameter for places - `extract_number(decimal_places=...)` now has several options: - `decimal_places=n` will round to `n` places - `decimal_places=0` will round up to nearest int, equiv. ceil(result) - `decimal_places=-1` will round down to int, equiv. floor(result) - expanded comments and docstrings - remove old commented-out code --- lingua_franca/lang/parse_en.py | 59 ++++++++++++++++++++++------------ lingua_franca/parse.py | 11 +++++-- 2 files changed, 47 insertions(+), 23 deletions(-) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index 5932d80d..214e57ba 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -14,16 +14,17 @@ # limitations under the License. # from datetime import datetime, timedelta - from dateutil.relativedelta import relativedelta +from math import ceil, floor + +import json +import re from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \ invert_dict, ReplaceableNumber, partition_list, tokenize, Token, Normalizer from lingua_franca.lang.common_data_en import _ARTICLES_EN, _NUM_STRING_EN, \ _LONG_ORDINAL_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN -import re -import json from lingua_franca import resolve_resource_file from lingua_franca.time import now_local @@ -77,14 +78,22 @@ def generate_plurals_en(originals): _STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN) -def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, places=None): +def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, + decimal_places=None): """ Convert words in a string into their equivalent numbers. Args: - text str: - short_scale boolean: True if short scale numbers should be used. - ordinals boolean: True if ordinals (e.g. first, second, third) should + text (str): + short_scale (bool): True if short scale numbers should be used. + ordinals (bool): True if ordinals (e.g. first, second, third) should be parsed to their number values (1, 2, 3...) + decimal_places (int or None): Positive value will round to X places. + Val of 0 will round up to nearest int, + equivalent to `math.ceil(result)` + Val of -1 will round down to nearest int, + equivalent to `math.floor(result)` + Val of None will perform no rounding, + potentially returning a very long string. Returns: str @@ -95,7 +104,7 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, places= tokens = tokenize(text) numbers_to_replace = \ _extract_numbers_with_text_en( - tokens, short_scale, ordinals, places=places) + tokens, short_scale, ordinals, places=decimal_places) numbers_to_replace.sort(key=lambda number: number.start_index) results = [] @@ -271,14 +280,16 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): While this is a helper for extractnumber_en, it also depends on extractnumber_en, to parse out the components of the decimal. - This does not currently handle things like: - number dot number number number - Args: tokens [Token]: The text to parse. short_scale boolean: ordinals boolean: - places [int]: Number of decimal places to return + places [int] or None: Number of decimal places to return + None performs no rounding + Positive int rounds to so many places + 0 value rounds up to nearest int + -1 value rounds down to nearest int + other values throw error Returns: (float, [Token]) @@ -301,6 +312,14 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): if not numbers1 or not numbers2: return None, None + # `numbers2` may have caught numbers which are part of the + # input string, but which are not part of *this* number. + # For example, for the input string: + # "a ratio of one point five to one" + # `numbers2` might read, `numbers2 == [5, 1]` + # + # truncate `numbers2` to contain only those tokens which were + # adjacent in the input string. idx = 1 stop = False while idx < len(numbers2) and not stop: @@ -312,23 +331,23 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): numbers2 = numbers2[:idx] number = numbers1[-1] - # decimal = numbers2[0] - # TODO handle number dot number number number + if "." not in str(numbers2[0].text): return_value = float('0.' + "".join([str( decimal.value) for decimal in numbers2])) return_value = number.value + return_value - if return_value == int(return_value): - return_value = int(return_value) - - # out_part2 = partitions[2] - # for n in numbers2: - # out_part2[n.index] = n.value + if places: + if places == 0: + return_value = ceil(return_value) + elif places == -1: + return_value = floor(return_value) return_tokens = number.tokens + partitions[1] for n in numbers2: return_tokens += n.tokens + if not places: + return return_value, return_tokens return (round(return_value, places) if places else return_value), return_tokens return None, None diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 547d1a9e..34139165 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -119,7 +119,7 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None, def extract_number(text, short_scale=True, ordinals=False, lang=None, - decimal_places=False): + decimal_places=None): """Takes in a string and extracts a number. Args: @@ -130,8 +130,13 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None, See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 lang (str): the BCP-47 code for the language to use, None uses default - decimal_places (int or False): rounds to # decimal places. Not yet implemented - in all languages. False performs no rounding. Uses builtin round() + decimal_places (int or None): Positive value will round to X places. + Val of 0 will round up to nearest int, + equivalent to `math.ceil(result)` + Val of -1 will round down to nearest int, + equivalent to `math.floor(result)` + Val of None will perform no rounding, + potentially returning a very long string. Returns: (int, float or False): The number extracted or False if the input text contains no numbers From 260e32aa0e0a8a2205fc9443c4a7ca0ecd9d1b3c Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Mon, 30 Mar 2020 14:39:34 -0700 Subject: [PATCH 4/5] add tests, fix introduced bugs --- lingua_franca/lang/parse_en.py | 8 +++++--- lingua_franca/parse.py | 11 ++++++++--- test/test_parse.py | 11 +++++++++++ 3 files changed, 24 insertions(+), 6 deletions(-) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index 214e57ba..bc5d1095 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -337,19 +337,21 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): return_value = float('0.' + "".join([str( decimal.value) for decimal in numbers2])) return_value = number.value + return_value - if places: + if places is not None: if places == 0: return_value = ceil(return_value) elif places == -1: return_value = floor(return_value) - + if places < 1: + return_value = int(return_value) return_tokens = number.tokens + partitions[1] for n in numbers2: return_tokens += n.tokens if not places: return return_value, return_tokens - return (round(return_value, places) if places else return_value), return_tokens + return (round(return_value, places) if places > 0 + else return_value), return_tokens return None, None diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 34139165..357a4f8d 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -82,7 +82,7 @@ def match_one(query, choices): def extract_numbers(text, short_scale=True, ordinals=False, lang=None, - decimal_places=False): + decimal_places=None): """ Takes in a string and extracts a list of numbers. @@ -94,8 +94,13 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None, See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 lang (str): the BCP-47 code for the language to use, None uses default - decimal_places (int or False): rounds to # decimal places. Not yet implemented - in all languages. False performs no rounding. Uses builtin round() + decimal_places (int or None): Positive value will round to X places. + Val of 0 will round up to nearest int, + equivalent to `math.ceil(result)` + Val of -1 will round down to nearest int, + equivalent to `math.floor(result)` + Val of None will perform no rounding, + potentially returning a very long string. Returns: list: list of extracted numbers as floats, or empty list if none found """ diff --git a/test/test_parse.py b/test/test_parse.py index bcb90038..1c6a83eb 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -744,6 +744,17 @@ def test_multiple_numbers(self): " bingo ten nancy forty six test" " with decimal rounding", decimal_places=2), [round(6.579, 2), 10, 46]) + # test integer rounding, multiple decimals in string + self.assertEqual(extract_numbers( + "five hundred seventy point seven two and thirty one point eight"), + [570.72, 31.8]) + self.assertEqual(extract_numbers( + "five hundred seventy point seven two and thirty one point eight", + decimal_places=0), [571, 32]) + self.assertEqual(extract_numbers( + "five hundred seventy point seven two and thirty one point eight", + decimal_places=-1), [570, 31]) + def test_contractions(self): self.assertEqual(normalize("ain't"), "is not") From d1d5a04c40d1017b2f5dcd6e227c0602a67b486a Mon Sep 17 00:00:00 2001 From: ChanceNCounter Date: Mon, 6 Apr 2020 15:16:05 -0700 Subject: [PATCH 5/5] simplify behavior of decimal_places, update tests --- lingua_franca/lang/parse_en.py | 27 ++++++++++----------------- lingua_franca/parse.py | 13 ++++++------- test/test_parse.py | 4 ++-- 3 files changed, 18 insertions(+), 26 deletions(-) diff --git a/lingua_franca/lang/parse_en.py b/lingua_franca/lang/parse_en.py index bc5d1095..a0238372 100644 --- a/lingua_franca/lang/parse_en.py +++ b/lingua_franca/lang/parse_en.py @@ -15,7 +15,6 @@ # from datetime import datetime, timedelta from dateutil.relativedelta import relativedelta -from math import ceil, floor import json import re @@ -284,12 +283,9 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): tokens [Token]: The text to parse. short_scale boolean: ordinals boolean: - places [int] or None: Number of decimal places to return + places [int] or None: Number of decimal places to return. + 0 truncates the decimal part None performs no rounding - Positive int rounds to so many places - 0 value rounds up to nearest int - -1 value rounds down to nearest int - other values throw error Returns: (float, [Token]) @@ -336,22 +332,19 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None): if "." not in str(numbers2[0].text): return_value = float('0.' + "".join([str( decimal.value) for decimal in numbers2])) - return_value = number.value + return_value - if places is not None: - if places == 0: - return_value = ceil(return_value) - elif places == -1: - return_value = floor(return_value) - if places < 1: - return_value = int(return_value) return_tokens = number.tokens + partitions[1] for n in numbers2: return_tokens += n.tokens - if not places: - return return_value, return_tokens + if places is not None: + if places > 0: + return_value = number.value + return_value + else: + return_value = number.value + else: + return return_value + number.value, return_tokens return (round(return_value, places) if places > 0 - else return_value), return_tokens + else str(return_value).split('.')[0]), return_tokens return None, None diff --git a/lingua_franca/parse.py b/lingua_franca/parse.py index 357a4f8d..fcc543ef 100644 --- a/lingua_franca/parse.py +++ b/lingua_franca/parse.py @@ -94,13 +94,12 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None, See https://en.wikipedia.org/wiki/Names_of_large_numbers ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3 lang (str): the BCP-47 code for the language to use, None uses default - decimal_places (int or None): Positive value will round to X places. - Val of 0 will round up to nearest int, - equivalent to `math.ceil(result)` - Val of -1 will round down to nearest int, - equivalent to `math.floor(result)` - Val of None will perform no rounding, - potentially returning a very long string. + decimal_places (int) or None: Number of decimal places to return. + None performs no rounding + 0 truncates the decimal part + ("one point two six one" becomes 1) + 1+ rounds to that many places + (decimal_places=1 turns "one point two six one" into 1.2) Returns: list: list of extracted numbers as floats, or empty list if none found """ diff --git a/test/test_parse.py b/test/test_parse.py index 1c6a83eb..ebd97036 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -750,10 +750,10 @@ def test_multiple_numbers(self): [570.72, 31.8]) self.assertEqual(extract_numbers( "five hundred seventy point seven two and thirty one point eight", - decimal_places=0), [571, 32]) + decimal_places=1), [570.7, 31.8]) self.assertEqual(extract_numbers( "five hundred seventy point seven two and thirty one point eight", - decimal_places=-1), [570, 31]) + decimal_places=0), [570, 31]) def test_contractions(self):