Skip to content

Commit

Permalink
address feedback from PR, add granularity
Browse files Browse the repository at this point in the history
- more visible funcs now have more explicitly-named parameter for places
- `extract_number(decimal_places=...)` now has several options:
  - `decimal_places=n` will round to `n` places
  - `decimal_places=0` will round up to nearest int, equiv. ceil(result)
  - `decimal_places=-1` will round down to int, equiv. floor(result)
  - expanded comments and docstrings
- remove old commented-out code
  • Loading branch information
ChanceNCounter committed Mar 30, 2020
1 parent 715fdda commit 951b8df
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 23 deletions.
59 changes: 39 additions & 20 deletions lingua_franca/lang/parse_en.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,16 +14,17 @@
# limitations under the License.
#
from datetime import datetime, timedelta

from dateutil.relativedelta import relativedelta
from math import ceil, floor

import json
import re

from lingua_franca.lang.parse_common import is_numeric, look_for_fractions, \
invert_dict, ReplaceableNumber, partition_list, tokenize, Token, Normalizer
from lingua_franca.lang.common_data_en import _ARTICLES_EN, _NUM_STRING_EN, \
_LONG_ORDINAL_EN, _LONG_SCALE_EN, _SHORT_SCALE_EN, _SHORT_ORDINAL_EN

import re
import json
from lingua_franca import resolve_resource_file
from lingua_franca.time import now_local

Expand Down Expand Up @@ -77,14 +78,22 @@ def generate_plurals_en(originals):
_STRING_LONG_ORDINAL_EN = invert_dict(_LONG_ORDINAL_EN)


def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, places=None):
def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False,
decimal_places=None):
"""
Convert words in a string into their equivalent numbers.
Args:
text str:
short_scale boolean: True if short scale numbers should be used.
ordinals boolean: True if ordinals (e.g. first, second, third) should
text (str):
short_scale (bool): True if short scale numbers should be used.
ordinals (bool): True if ordinals (e.g. first, second, third) should
be parsed to their number values (1, 2, 3...)
decimal_places (int or None): Positive value will round to X places.
Val of 0 will round up to nearest int,
equivalent to `math.ceil(result)`
Val of -1 will round down to nearest int,
equivalent to `math.floor(result)`
Val of None will perform no rounding,
potentially returning a very long string.
Returns:
str
Expand All @@ -95,7 +104,7 @@ def _convert_words_to_numbers_en(text, short_scale=True, ordinals=False, places=
tokens = tokenize(text)
numbers_to_replace = \
_extract_numbers_with_text_en(
tokens, short_scale, ordinals, places=places)
tokens, short_scale, ordinals, places=decimal_places)
numbers_to_replace.sort(key=lambda number: number.start_index)

results = []
Expand Down Expand Up @@ -271,14 +280,16 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None):
While this is a helper for extractnumber_en, it also depends on
extractnumber_en, to parse out the components of the decimal.
This does not currently handle things like:
number dot number number number
Args:
tokens [Token]: The text to parse.
short_scale boolean:
ordinals boolean:
places [int]: Number of decimal places to return
places [int] or None: Number of decimal places to return
None performs no rounding
Positive int rounds to so many places
0 value rounds up to nearest int
-1 value rounds down to nearest int
other values throw error
Returns:
(float, [Token])
Expand All @@ -301,6 +312,14 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None):
if not numbers1 or not numbers2:
return None, None

# `numbers2` may have caught numbers which are part of the
# input string, but which are not part of *this* number.
# For example, for the input string:
# "a ratio of one point five to one"
# `numbers2` might read, `numbers2 == [5, 1]`
#
# truncate `numbers2` to contain only those tokens which were
# adjacent in the input string.
idx = 1
stop = False
while idx < len(numbers2) and not stop:
Expand All @@ -312,23 +331,23 @@ def _extract_decimal_with_text_en(tokens, short_scale, ordinals, places=None):
numbers2 = numbers2[:idx]

number = numbers1[-1]
# decimal = numbers2[0]

# TODO handle number dot number number number

if "." not in str(numbers2[0].text):
return_value = float('0.' + "".join([str(
decimal.value) for decimal in numbers2]))
return_value = number.value + return_value
if return_value == int(return_value):
return_value = int(return_value)

# out_part2 = partitions[2]
# for n in numbers2:
# out_part2[n.index] = n.value
if places:
if places == 0:
return_value = ceil(return_value)
elif places == -1:
return_value = floor(return_value)

return_tokens = number.tokens + partitions[1]
for n in numbers2:
return_tokens += n.tokens
if not places:
return return_value, return_tokens

return (round(return_value, places) if places else return_value), return_tokens
return None, None
Expand Down
11 changes: 8 additions & 3 deletions lingua_franca/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def extract_numbers(text, short_scale=True, ordinals=False, lang=None,


def extract_number(text, short_scale=True, ordinals=False, lang=None,
decimal_places=False):
decimal_places=None):
"""Takes in a string and extracts a number.
Args:
Expand All @@ -130,8 +130,13 @@ def extract_number(text, short_scale=True, ordinals=False, lang=None,
See https://en.wikipedia.org/wiki/Names_of_large_numbers
ordinals (bool): consider ordinal numbers, e.g. third=3 instead of 1/3
lang (str): the BCP-47 code for the language to use, None uses default
decimal_places (int or False): rounds to # decimal places. Not yet implemented
in all languages. False performs no rounding. Uses builtin round()
decimal_places (int or None): Positive value will round to X places.
Val of 0 will round up to nearest int,
equivalent to `math.ceil(result)`
Val of -1 will round down to nearest int,
equivalent to `math.floor(result)`
Val of None will perform no rounding,
potentially returning a very long string.
Returns:
(int, float or False): The number extracted or False if the input
text contains no numbers
Expand Down

0 comments on commit 951b8df

Please sign in to comment.