diff --git a/lingua_franca/lang/parse_fa.py b/lingua_franca/lang/parse_fa.py index ad856b95..753ac8eb 100644 --- a/lingua_franca/lang/parse_fa.py +++ b/lingua_franca/lang/parse_fa.py @@ -13,14 +13,11 @@ # See the License for the specific language governing permissions and # limitations under the License. # -import json -from datetime import timedelta +from datetime import datetime, timedelta -from lingua_franca.internal import resolve_resource_file from lingua_franca.lang.common_data_fa import (_FARSI_BIG, _FARSI_HUNDREDS, _FARSI_ONES, _FARSI_TENS, _FORMAL_VARIANT) -from lingua_franca.lang.parse_common import Normalizer from lingua_franca.time import now_local @@ -31,6 +28,7 @@ def _is_number(s): except ValueError: return False + def _parse_sentence(text): for key, value in _FORMAL_VARIANT.items(): text = text.replace(key, value) @@ -39,8 +37,8 @@ def _parse_sentence(text): current_number = 0 current_words = [] s = 0 - step = 10 mode = 'init' + def finish_num(): nonlocal current_number nonlocal s @@ -54,13 +52,14 @@ def finish_num(): current_number = 0 current_words = [] mode = 'init' + for x in ar: if x == "و": if mode == 'num_ten' or mode == 'num_hundred' or mode == 'num_one': mode += '_va' current_words.append(x) elif mode == 'num': - current_words.append(x) + current_words.append(x) else: finish_num() result.append(x) @@ -71,7 +70,7 @@ def finish_num(): elif x in _FARSI_ONES: t = _FARSI_ONES.index(x) if mode != 'init' and mode != 'num_hundred_va' and mode != 'num': - if not(t < 10 and mode == 'num_ten_va'): + if not (t < 10 and mode == 'num_ten_va'): finish_num() current_words.append(x) s += t @@ -80,20 +79,20 @@ def finish_num(): if mode != 'init' and mode != 'num_hundred_va' and mode != 'num': finish_num() current_words.append(x) - s += _FARSI_TENS.index(x)*10 + s += _FARSI_TENS.index(x) * 10 mode = 'num_ten' elif x in _FARSI_HUNDREDS: if mode != 'init' and mode != 'num': finish_num() current_words.append(x) - s += _FARSI_HUNDREDS.index(x)*100 + s += _FARSI_HUNDREDS.index(x) * 100 mode = 'num_hundred' elif x in _FARSI_BIG: current_words.append(x) d = _FARSI_BIG.index(x) if mode == 'init' and d == 1: s = 1 - s *= 10**(3*d) + s *= 10 ** (3 * d) current_number += s s = 0 mode = 'num' @@ -120,6 +119,7 @@ def finish_num(): 'هفته': timedelta(weeks=1), } + def extract_duration_fa(text): """ Convert an english phrase into a number of seconds @@ -208,9 +208,8 @@ def extract_datetime_fa(text, anchorDate=None, default_time=None): .replace('سه شنبه', 'سهشنبه') \ .replace('چهار شنبه', 'چهارشنبه') \ .replace('پنج شنبه', 'پنجشنبه') \ - .replace('بعد از ظهر', 'بعدازظهر') \ - - + .replace('بعد از ظهر', 'بعدازظهر') + if not anchorDate: anchorDate = now_local() today = anchorDate.replace(hour=0, minute=0, second=0, microsecond=0) @@ -225,11 +224,11 @@ def extract_datetime_fa(text, anchorDate=None, default_time=None): 'یکشنبه', ] daysDict = { - 'پریروز': today + timedelta(days= -2), - 'دیروز': today + timedelta(days= -1), + 'پریروز': today + timedelta(days=-2), + 'دیروز': today + timedelta(days=-1), 'امروز': today, - 'فردا': today + timedelta(days= 1), - 'پسفردا': today + timedelta(days= 2), + 'فردا': today + timedelta(days=1), + 'پسفردا': today + timedelta(days=2), } timesDict = { 'صبح': timedelta(hours=8), @@ -307,34 +306,6 @@ def extract_datetime_fa(text, anchorDate=None, default_time=None): remainder.append(x) return (result, " ".join(remainder)) -def is_fractional_fa(input_str, short_scale=True): - """ - This function takes the given text and checks if it is a fraction. - - Args: - input_str (str): the string to check if fractional - short_scale (bool): use short scale if True, long scale if False - Returns: - (bool) or (float): False if not a fraction, otherwise the fraction - - """ - if input_str.endswith('s', -1): - input_str = input_str[:len(input_str) - 1] # e.g. "fifths" - - fracts = {"whole": 1, "half": 2, "halve": 2, "quarter": 4} - if short_scale: - for num in _SHORT_ORDINAL_FA: - if num > 2: - fracts[_SHORT_ORDINAL_FA[num]] = num - else: - for num in _LONG_ORDINAL_FA: - if num > 2: - fracts[_LONG_ORDINAL_FA[num]] = num - - if input_str.lower() in fracts: - return 1.0 / fracts[input_str.lower()] - return False - def extract_numbers_fa(text, short_scale=True, ordinals=False): """ diff --git a/lingua_franca/lang/parse_fr.py b/lingua_franca/lang/parse_fr.py index 19561829..9728653f 100644 --- a/lingua_franca/lang/parse_fr.py +++ b/lingua_franca/lang/parse_fr.py @@ -944,8 +944,7 @@ def date_found(): if not hasYear: temp = datetime.strptime(datestr, "%B %d") if extractedDate.tzinfo: - temp = temp.replace(tzinfo=gettz("UTC")) - temp = temp.astimezone(extractedDate.tzinfo) + temp = temp.replace(tzinfo=extractedDate.tzinfo) temp = temp.replace(year=extractedDate.year) if extractedDate < temp: extractedDate = extractedDate.replace(year=int(currentYear), diff --git a/lingua_franca/time.py b/lingua_franca/time.py index 17f46d01..aeda93ee 100644 --- a/lingua_franca/time.py +++ b/lingua_franca/time.py @@ -46,7 +46,7 @@ def now_utc(): Returns: (datetime): The current time in Universal Time, aka GMT """ - return to_utc(datetime.utcnow()) + return datetime.utcnow().replace(tzinfo=gettz("UTC")) def now_local(tz=None): @@ -58,8 +58,7 @@ def now_local(tz=None): Returns: (datetime): The current time """ - if not tz: - tz = default_timezone() + tz = tz or default_timezone() return datetime.now(tz) @@ -71,11 +70,10 @@ def to_utc(dt): Returns: (datetime): time converted to UTC """ - tzUTC = gettz("UTC") - if dt.tzinfo: - return dt.astimezone(tzUTC) - else: - return dt.replace(tzinfo=gettz("UTC")).astimezone(tzUTC) + tz = gettz("UTC") + if not dt.tzinfo: + dt = dt.replace(tzinfo=default_timezone()) + return dt.astimezone(tz) def to_local(dt): @@ -87,8 +85,20 @@ def to_local(dt): (datetime): time converted to the local timezone """ tz = default_timezone() - if dt.tzinfo: - return dt.astimezone(tz) - else: - return dt.replace(tzinfo=gettz("UTC")).astimezone(tz) + if not dt.tzinfo: + dt = dt.replace(tzinfo=default_timezone()) + return dt.astimezone(tz) + +def to_system(dt): + """Convert a datetime to the system's local timezone + + Args: + dt (datetime): A datetime (if no timezone, assumed to be UTC) + Returns: + (datetime): time converted to the operation system's timezone + """ + tz = tzlocal() + if not dt.tzinfo: + dt = dt.replace(tzinfo=default_timezone()) + return dt.astimezone(tz) diff --git a/test/test_format.py b/test/test_format.py index 2a3800b9..07b50505 100644 --- a/test/test_format.py +++ b/test/test_format.py @@ -13,110 +13,42 @@ # See the License for the specific language governing permissions and # limitations under the License. # +import ast +import datetime import json import unittest -import datetime -import ast -import warnings -import sys from pathlib import Path +from dateutil import tz # TODO either write a getter for lingua_franca.internal._SUPPORTED_LANGUAGES, # or make it public somehow -from lingua_franca import load_languages, unload_languages, set_default_lang, \ - get_primary_lang_code, get_active_langs, get_supported_langs -from lingua_franca.internal import UnsupportedLanguageError -from lingua_franca.format import nice_number -from lingua_franca.format import nice_time +from lingua_franca import load_language, unload_language, set_default_lang +from lingua_franca.format import date_time_format from lingua_franca.format import nice_date from lingua_franca.format import nice_date_time +from lingua_franca.format import nice_number +from lingua_franca.format import nice_time from lingua_franca.format import nice_year -from lingua_franca.format import nice_duration -from lingua_franca.format import pronounce_number -from lingua_franca.format import date_time_format -from lingua_franca.format import join_list -from lingua_franca.time import default_timezone +from lingua_franca.time import default_timezone, set_default_tz, now_local, \ + to_local def setUpModule(): - load_languages(get_supported_langs()) - # TODO spin English tests off into another file, like other languages, so we - # don't have to do this confusing thing in the "master" test_format.py + load_language("en") set_default_lang('en-us') def tearDownModule(): - unload_languages(get_active_langs()) - - -NUMBERS_FIXTURE_EN = { - 1.435634: '1.436', - 2: '2', - 5.0: '5', - 0.027: '0.027', - 0.5: 'a half', - 1.333: '1 and a third', - 2.666: '2 and 2 thirds', - 0.25: 'a forth', - 1.25: '1 and a forth', - 0.75: '3 forths', - 1.75: '1 and 3 forths', - 3.4: '3 and 2 fifths', - 16.8333: '16 and 5 sixths', - 12.5714: '12 and 4 sevenths', - 9.625: '9 and 5 eigths', - 6.777: '6 and 7 ninths', - 3.1: '3 and a tenth', - 2.272: '2 and 3 elevenths', - 5.583: '5 and 7 twelveths', - 8.384: '8 and 5 thirteenths', - 0.071: 'a fourteenth', - 6.466: '6 and 7 fifteenths', - 8.312: '8 and 5 sixteenths', - 2.176: '2 and 3 seventeenths', - 200.722: '200 and 13 eighteenths', - 7.421: '7 and 8 nineteenths', - 0.05: 'a twentyith' -} + unload_language("en") class TestNiceNumberFormat(unittest.TestCase): - tmp_var = None - - def set_tmp_var(self, val): - self.tmp_var = val - - def test_convert_float_to_nice_number(self): - for number, number_str in NUMBERS_FIXTURE_EN.items(): - self.assertEqual(nice_number(number), number_str, - 'should format {} as {} and not {}'.format( - number, number_str, nice_number(number))) - - def test_specify_denominator(self): - self.assertEqual(nice_number(5.5, denominators=[1, 2, 3]), - '5 and a half', - 'should format 5.5 as 5 and a half not {}'.format( - nice_number(5.5, denominators=[1, 2, 3]))) - self.assertEqual(nice_number(2.333, denominators=[1, 2]), - '2.333', - 'should format 2.333 as 2.333 not {}'.format( - nice_number(2.333, denominators=[1, 2]))) - - def test_no_speech(self): - self.assertEqual(nice_number(6.777, speech=False), - '6 7/9', - 'should format 6.777 as 6 7/9 not {}'.format( - nice_number(6.777, speech=False))) - self.assertEqual(nice_number(6.0, speech=False), - '6', - 'should format 6.0 as 6 not {}'.format( - nice_number(6.0, speech=False))) - def test_unknown_language(self): """ An unknown / unhandled language should return the string representation of the input number. """ + def bypass_warning(): self.assertEqual( nice_number(5.5, lang='as-df'), '5.5', @@ -130,263 +62,37 @@ def bypass_warning(): self.assertWarns(UserWarning, bypass_warning) -class TestPronounceNumber(unittest.TestCase): - def test_convert_int(self): - self.assertEqual(pronounce_number(0), "zero") - self.assertEqual(pronounce_number(1), "one") - self.assertEqual(pronounce_number(10), "ten") - self.assertEqual(pronounce_number(15), "fifteen") - self.assertEqual(pronounce_number(20), "twenty") - self.assertEqual(pronounce_number(27), "twenty seven") - self.assertEqual(pronounce_number(30), "thirty") - self.assertEqual(pronounce_number(33), "thirty three") - - def test_convert_negative_int(self): - self.assertEqual(pronounce_number(-1), "minus one") - self.assertEqual(pronounce_number(-10), "minus ten") - self.assertEqual(pronounce_number(-15), "minus fifteen") - self.assertEqual(pronounce_number(-20), "minus twenty") - self.assertEqual(pronounce_number(-27), "minus twenty seven") - self.assertEqual(pronounce_number(-30), "minus thirty") - self.assertEqual(pronounce_number(-33), "minus thirty three") - - def test_convert_decimals(self): - self.assertEqual(pronounce_number(0.05), "zero point zero five") - self.assertEqual(pronounce_number(-0.05), "minus zero point zero five") - self.assertEqual(pronounce_number(1.234), - "one point two three") - self.assertEqual(pronounce_number(21.234), - "twenty one point two three") - self.assertEqual(pronounce_number(21.234, places=1), - "twenty one point two") - self.assertEqual(pronounce_number(21.234, places=0), - "twenty one") - self.assertEqual(pronounce_number(21.234, places=3), - "twenty one point two three four") - self.assertEqual(pronounce_number(21.234, places=4), - "twenty one point two three four") - self.assertEqual(pronounce_number(21.234, places=5), - "twenty one point two three four") - self.assertEqual(pronounce_number(-1.234), - "minus one point two three") - self.assertEqual(pronounce_number(-21.234), - "minus twenty one point two three") - self.assertEqual(pronounce_number(-21.234, places=1), - "minus twenty one point two") - self.assertEqual(pronounce_number(-21.234, places=0), - "minus twenty one") - self.assertEqual(pronounce_number(-21.234, places=3), - "minus twenty one point two three four") - self.assertEqual(pronounce_number(-21.234, places=4), - "minus twenty one point two three four") - self.assertEqual(pronounce_number(-21.234, places=5), - "minus twenty one point two three four") - - def test_convert_hundreds(self): - self.assertEqual(pronounce_number(100), "one hundred") - self.assertEqual(pronounce_number(666), "six hundred and sixty six") - self.assertEqual(pronounce_number(1456), "fourteen fifty six") - self.assertEqual(pronounce_number(103254654), "one hundred and three " - "million, two hundred " - "and fifty four " - "thousand, six hundred " - "and fifty four") - self.assertEqual(pronounce_number(1512457), "one million, five hundred" - " and twelve thousand, " - "four hundred and fifty " - "seven") - self.assertEqual(pronounce_number(209996), "two hundred and nine " - "thousand, nine hundred " - "and ninety six") - - def test_convert_scientific_notation(self): - self.assertEqual(pronounce_number(0, scientific=True), "zero") - self.assertEqual(pronounce_number(33, scientific=True), - "three point three times ten to the power of one") - self.assertEqual(pronounce_number(299792458, scientific=True), - "two point nine nine times ten to the power of eight") - self.assertEqual(pronounce_number(299792458, places=6, - scientific=True), - "two point nine nine seven nine two five times " - "ten to the power of eight") - self.assertEqual(pronounce_number(1.672e-27, places=3, - scientific=True), - "one point six seven two times ten to the power of " - "negative twenty seven") - - def test_auto_scientific_notation(self): - self.assertEqual( - pronounce_number(1.1e-150), "one point one times ten to the " - "power of negative one hundred " - "and fifty") - # value is platform dependent so better not use in tests? - # self.assertEqual( - # pronounce_number(sys.float_info.min), "two point two two times " - # "ten to the power of " - # "negative three hundred " - # "and eight") - # self.assertEqual( - # pronounce_number(sys.float_info.max), "one point seven nine " - # "times ten to the power of" - # " three hundred and eight") - - def test_large_numbers(self): - self.assertEqual( - pronounce_number(299792458, short_scale=True), - "two hundred and ninety nine million, seven hundred " - "and ninety two thousand, four hundred and fifty eight") - self.assertEqual( - pronounce_number(299792458, short_scale=False), - "two hundred and ninety nine million, seven hundred " - "and ninety two thousand, four hundred and fifty eight") - self.assertEqual( - pronounce_number(100034000000299792458, short_scale=True), - "one hundred quintillion, thirty four quadrillion, " - "two hundred and ninety nine million, seven hundred " - "and ninety two thousand, four hundred and fifty eight") - self.assertEqual( - pronounce_number(100034000000299792458, short_scale=False), - "one hundred trillion, thirty four thousand billion, " - "two hundred and ninety nine million, seven hundred " - "and ninety two thousand, four hundred and fifty eight") - self.assertEqual( - pronounce_number(10000000000, short_scale=True), - "ten billion") - self.assertEqual( - pronounce_number(1000000000000, short_scale=True), - "one trillion") - # TODO maybe beautify this - self.assertEqual( - pronounce_number(1000001, short_scale=True), - "one million, one") - self.assertEqual(pronounce_number(95505896639631893), - "ninety five quadrillion, five hundred and five " - "trillion, eight hundred and ninety six billion, six " - "hundred and thirty nine million, six hundred and " - "thirty one thousand, eight hundred and ninety three") - self.assertEqual(pronounce_number(95505896639631893, - short_scale=False), - "ninety five thousand five hundred and five billion, " - "eight hundred and ninety six thousand six hundred " - "and thirty nine million, six hundred and thirty one " - "thousand, eight hundred and ninety three") - self.assertEqual(pronounce_number(10e80, places=1), - "one qesvigintillion") - # TODO floating point rounding issues might happen - self.assertEqual(pronounce_number(1.9874522571e80, places=9), - "one hundred and ninety eight quinquavigintillion, " - "seven hundred and forty five quattuorvigintillion, " - "two hundred and twenty five tresvigintillion, " - "seven hundred and nine uuovigintillion, " - "nine hundred and ninety nine unvigintillion, " - "nine hundred and eighty nine vigintillion, " - "seven hundred and thirty novendecillion, nine " - "hundred and nineteen octodecillion, nine hundred " - "and ninety nine septendecillion, nine hundred " - "and fifty five sedecillion, four hundred and " - "ninety eight quinquadecillion, two hundred and " - "fourteen quattuordecillion, eight hundred and " - "forty five tredecillion, four hundred and " - "twenty nine duodecillion, four hundred and " - "forty four undecillion, three hundred and " - "thirty six decillion, seven hundred and twenty " - "four nonillion, five hundred and sixty nine " - "octillion, three hundred and seventy five " - "septillion, two hundred and thirty nine sextillion," - " six hundred and seventy quintillion, five hundred " - "and seventy four quadrillion, seven hundred and " - "thirty nine trillion, seven hundred and forty " - "eight billion, four hundred and seventy million, " - "nine hundred and fifteen thousand, seventy two") - self.assertEqual(pronounce_number(1.00000000000000001e150), - "nine hundred and ninety nine millinillion, nine " - "hundred and ninety nine uncentillion, nine hundred " - "and ninety nine centillion, nine hundred and ninety" - " nine nonagintillion, nine hundred and ninety nine" - " octogintillion, nine hundred and eighty" - " septuagintillion, eight hundred and thirty five " - "sexagintillion, five hundred and ninety six " - "quinquagintillion, one hundred and seventy two" - " quadragintillion, four hundred and thirty seven" - " noventrigintillion, three hundred and seventy four" - " octotrigintillion, five hundred and ninety" - " septentrigintillion, five hundred and seventy" - " three sestrigintillion, one hundred and twenty " - "quinquatrigintillion, fourteen quattuortrigintillion" - ", thirty trestrigintillion, three hundred and " - "eighteen duotrigintillion, seven hundred and ninety" - " three untrigintillion, ninety one trigintillion," - " one hundred and sixty four novemvigintillion, eight" - " hundred and ten octovigintillion, one hundred and" - " fifty four septemvigintillion, one hundred " - "qesvigintillion, one hundred and twelve " - "quinquavigintillion, two hundred and three " - "quattuorvigintillion, six hundred and seventy " - "eight tresvigintillion, five hundred and eighty " - "two uuovigintillion, nine hundred and seventy six" - " unvigintillion, two hundred and ninety eight " - "vigintillion, two hundred and sixty eight " - "novendecillion, six hundred and sixteen " - "octodecillion, two hundred and twenty one " - "septendecillion, one hundred and fifty one" - " sedecillion, nine hundred and sixty two " - "quinquadecillion, seven hundred and two" - " quattuordecillion, sixty tredecillion, two hundred" - " and sixty six duodecillion, one hundred and " - "seventy six undecillion, five decillion, four " - "hundred and forty nonillion, five hundred and" - " sixty seven octillion, thirty two septillion, " - "three hundred and thirty one sextillion, " - "two hundred and eight quintillion, four hundred and " - "three quadrillion, nine hundred and forty eight " - "trillion, two hundred and thirty three billion, " - "three hundred and seventy three million, five " - "hundred and fifteen thousand, seven hundred and " - "seventy six") - - # infinity - self.assertEqual( - pronounce_number(sys.float_info.max * 2), "infinity") - self.assertEqual( - pronounce_number(float("inf")), - "infinity") - self.assertEqual( - pronounce_number(float("-inf")), - "negative infinity") - - def test_ordinals(self): - self.assertEqual(pronounce_number(1, ordinals=True), "first") - self.assertEqual(pronounce_number(10, ordinals=True), "tenth") - self.assertEqual(pronounce_number(15, ordinals=True), "fifteenth") - self.assertEqual(pronounce_number(20, ordinals=True), "twentieth") - self.assertEqual(pronounce_number(27, ordinals=True), "twenty seventh") - self.assertEqual(pronounce_number(30, ordinals=True), "thirtieth") - self.assertEqual(pronounce_number(33, ordinals=True), "thirty third") - self.assertEqual(pronounce_number(100, ordinals=True), "hundredth") - self.assertEqual(pronounce_number(1000, ordinals=True), "thousandth") - self.assertEqual(pronounce_number(10000, ordinals=True), - "ten thousandth") - self.assertEqual(pronounce_number(18691, ordinals=True), - "eighteen thousand, six hundred and ninety first") - self.assertEqual(pronounce_number(1567, ordinals=True), - "one thousand, five hundred and sixty seventh") - self.assertEqual(pronounce_number(1.672e-27, places=3, - scientific=True, ordinals=True), - "one point six seven two times ten to the negative " - "twenty seventh power") - self.assertEqual(pronounce_number(18e6, ordinals=True), - "eighteen millionth") - self.assertEqual(pronounce_number(18e12, ordinals=True, - short_scale=False), - "eighteen billionth") - self.assertEqual(pronounce_number(18e12, ordinals=True), - "eighteen trillionth") - self.assertEqual(pronounce_number(18e18, ordinals=True, - short_scale=False), "eighteen " - "trillionth") - -# def nice_time(dt, lang="en-us", speech=True, use_24hour=False, -# use_ampm=False): +class TestTimezones(unittest.TestCase): + def test_default_tz(self): + default = default_timezone() + set_default_tz("America/Chicago") + + local_time = now_local() + local_tz = default_timezone() + us_time = datetime.datetime.now(tz=tz.gettz("America/Chicago")) + self.assertEqual(nice_date_time(local_time), + nice_date_time(us_time)) + self.assertEqual(local_time.tzinfo, local_tz) + + # naive datetimes assumed to be in default timezone already! + # in the case of datetime.now this corresponds to tzlocal() + # otherwise timezone is undefined and can not be guessed, we assume + # the user means "my timezone" and that LF was configured to use it + # beforehand, if unconfigured default == tzlocal() + dt = datetime.datetime(2021, 6, 23, 00, 43, 39) + dt_local = to_local(dt) + self.assertEqual(nice_time(dt), nice_time(dt_local)) + + set_default_tz(default) # undo changes to default tz after test + + def test_tz_conversion(self): + naive = datetime.datetime.now() + system_time = datetime.datetime.now(tz.tzlocal()) + # naive == datetime.now() == tzlocal() internally + # NOTE nice_date_time is not a localized function, it just formats + # the datetime object directly + self.assertEqual(nice_date_time(naive), + nice_date_time(system_time)) class TestNiceDateFormat(unittest.TestCase): @@ -402,132 +108,11 @@ def setUpClass(cls): with (sub_dir / 'date_time_test.json').open() as f: cls.test_config[sub_dir.parts[-1]] = json.loads(f.read()) - def test_convert_times(self): - dt = datetime.datetime(2017, 1, 31, - 13, 22, 3, tzinfo=default_timezone()) - - # Verify defaults haven't changed - self.assertEqual(nice_time(dt), - nice_time(dt, "en-us", True, False, False)) - - self.assertEqual(nice_time(dt), - "one twenty two") - - self.assertEqual(nice_time(dt, use_ampm=True), - "one twenty two p.m.") - self.assertEqual(nice_time(dt, speech=False), - "1:22") - self.assertEqual(nice_time(dt, speech=False, use_ampm=True), - "1:22 PM") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True), - "13:22") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True, - use_ampm=True), - "13:22") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), - "thirteen twenty two") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), - "thirteen twenty two") - - dt = datetime.datetime(2017, 1, 31, - 13, 0, 3, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "one o'clock") - self.assertEqual(nice_time(dt, use_ampm=True), - "one p.m.") - self.assertEqual(nice_time(dt, speech=False), - "1:00") - self.assertEqual(nice_time(dt, speech=False, use_ampm=True), - "1:00 PM") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True), - "13:00") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True, - use_ampm=True), - "13:00") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), - "thirteen hundred") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), - "thirteen hundred") - - dt = datetime.datetime(2017, 1, 31, - 13, 2, 3, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "one oh two") - self.assertEqual(nice_time(dt, use_ampm=True), - "one oh two p.m.") - self.assertEqual(nice_time(dt, speech=False), - "1:02") - self.assertEqual(nice_time(dt, speech=False, use_ampm=True), - "1:02 PM") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True), - "13:02") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True, - use_ampm=True), - "13:02") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), - "thirteen zero two") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), - "thirteen zero two") - - dt = datetime.datetime(2017, 1, 31, - 0, 2, 3, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "twelve oh two") - self.assertEqual(nice_time(dt, use_ampm=True), - "twelve oh two a.m.") - self.assertEqual(nice_time(dt, speech=False), - "12:02") - self.assertEqual(nice_time(dt, speech=False, use_ampm=True), - "12:02 AM") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True), - "00:02") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True, - use_ampm=True), - "00:02") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), - "zero zero zero two") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), - "zero zero zero two") - - dt = datetime.datetime(2018, 2, 8, - 1, 2, 33, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "one oh two") - self.assertEqual(nice_time(dt, use_ampm=True), - "one oh two a.m.") - self.assertEqual(nice_time(dt, speech=False), - "1:02") - self.assertEqual(nice_time(dt, speech=False, use_ampm=True), - "1:02 AM") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True), - "01:02") - self.assertEqual(nice_time(dt, speech=False, use_24hour=True, - use_ampm=True), - "01:02") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), - "zero one zero two") - self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), - "zero one zero two") - - dt = datetime.datetime(2017, 1, 31, - 12, 15, 9, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "quarter past twelve") - self.assertEqual(nice_time(dt, use_ampm=True), - "quarter past twelve p.m.") - - dt = datetime.datetime(2017, 1, 31, - 5, 30, 00, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt, use_ampm=True), - "half past five a.m.") - - dt = datetime.datetime(2017, 1, 31, - 1, 45, 00, tzinfo=default_timezone()) - self.assertEqual(nice_time(dt), - "quarter to two") - def test_nice_date(self): for lang in self.test_config: + load_language(lang) + set_default_lang(lang) + i = 1 while (self.test_config[lang].get('test_nice_date') and self.test_config[lang]['test_nice_date'].get(str(i))): @@ -544,18 +129,28 @@ def test_nice_date(self): nice_date(dt, lang=lang, now=now)) i = i + 1 + unload_language(lang) + # test all days in a year for all languages, # that some output is produced for lang in self.test_config: + load_language(lang) + set_default_lang(lang) + for dt in (datetime.datetime(2017, 12, 30, 0, 2, 3) + datetime.timedelta(n) for n in range(368)): self.assertTrue(len(nice_date(dt, lang=lang)) > 0) + unload_language(lang) + + set_default_lang('en') + def test_nice_date_time(self): # TODO: migrate these tests (in res files) to respect the new # language loading features. Right now, some of them break if # their languages are not default. for lang in self.test_config: + load_language(lang) set_default_lang(lang) i = 1 while (self.test_config[lang].get('test_nice_date_time') and @@ -578,10 +173,14 @@ def test_nice_date_time(self): use_24hour=ast.literal_eval(p['use_24hour']), use_ampm=ast.literal_eval(p['use_ampm']))) i = i + 1 + unload_language(lang) set_default_lang('en') def test_nice_year(self): for lang in self.test_config: + load_language(lang) + set_default_lang(lang) + i = 1 while (self.test_config[lang].get('test_nice_year') and self.test_config[lang]['test_nice_year'].get(str(i))): @@ -595,50 +194,23 @@ def test_nice_year(self): dt, lang=lang, bc=ast.literal_eval(p['bc']))) i = i + 1 + unload_language(lang) + # Test all years from 0 to 9999 for all languages, # that some output is produced for lang in self.test_config: + load_language(lang) + set_default_lang(lang) + print("Test all years in " + lang) for i in range(1, 9999): dt = datetime.datetime(i, 1, 31, 13, 2, 3, tzinfo=default_timezone()) self.assertTrue(len(nice_year(dt, lang=lang)) > 0) # Looking through the date sequence can be helpful -# print(nice_year(dt, lang=lang)) - - def test_nice_duration(self): - self.assertEqual(nice_duration(1), "one second") - self.assertEqual(nice_duration(3), "three seconds") - self.assertEqual(nice_duration(1, speech=False), "0:01") - self.assertEqual(nice_duration(61), "one minute one second") - self.assertEqual(nice_duration(61, speech=False), "1:01") - self.assertEqual(nice_duration(5000), - "one hour twenty three minutes twenty seconds") - self.assertEqual(nice_duration(5000, speech=False), "1:23:20") - self.assertEqual(nice_duration(50000), - "thirteen hours fifty three minutes twenty seconds") - self.assertEqual(nice_duration(50000, speech=False), "13:53:20") - self.assertEqual(nice_duration(500000), - "five days eighteen hours fifty three minutes twenty seconds") # nopep8 - self.assertEqual(nice_duration(500000, speech=False), "5d 18:53:20") - self.assertEqual(nice_duration(datetime.timedelta(seconds=500000), - speech=False), - "5d 18:53:20") - - def test_join(self): - self.assertEqual(join_list(None, "and"), "") - self.assertEqual(join_list([], "and"), "") - - self.assertEqual(join_list(["a"], "and"), "a") - self.assertEqual(join_list(["a", "b"], "and"), "a and b") - self.assertEqual(join_list(["a", "b"], "or"), "a or b") - - self.assertEqual(join_list(["a", "b", "c"], "and"), "a, b and c") - self.assertEqual(join_list(["a", "b", "c"], "or"), "a, b or c") - self.assertEqual(join_list(["a", "b", "c"], "or", ";"), "a; b or c") - self.assertEqual(join_list(["a", "b", "c", "d"], "or"), "a, b, c or d") - - self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d") + unload_language(lang) + + set_default_lang('en') if __name__ == "__main__": diff --git a/test/test_format_en.py b/test/test_format_en.py new file mode 100644 index 00000000..78d87603 --- /dev/null +++ b/test/test_format_en.py @@ -0,0 +1,537 @@ +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import json +import unittest +import datetime +import ast +import warnings +import sys +from dateutil import tz +from pathlib import Path + +# TODO either write a getter for lingua_franca.internal._SUPPORTED_LANGUAGES, +# or make it public somehow +from lingua_franca import load_language, unload_language, set_default_lang, \ + get_primary_lang_code, get_active_langs, get_supported_langs +from lingua_franca.internal import UnsupportedLanguageError +from lingua_franca.format import nice_number +from lingua_franca.format import nice_time +from lingua_franca.format import nice_date +from lingua_franca.format import nice_date_time +from lingua_franca.format import nice_year +from lingua_franca.format import nice_duration +from lingua_franca.format import pronounce_number +from lingua_franca.format import date_time_format +from lingua_franca.format import join_list +from lingua_franca.time import default_timezone, set_default_tz, now_local, \ + to_local + + + +def setUpModule(): + load_language('en') + set_default_lang('en') + + +def tearDownModule(): + unload_language('en') + + +NUMBERS_FIXTURE_EN = { + 1.435634: '1.436', + 2: '2', + 5.0: '5', + 0.027: '0.027', + 0.5: 'a half', + 1.333: '1 and a third', + 2.666: '2 and 2 thirds', + 0.25: 'a forth', + 1.25: '1 and a forth', + 0.75: '3 forths', + 1.75: '1 and 3 forths', + 3.4: '3 and 2 fifths', + 16.8333: '16 and 5 sixths', + 12.5714: '12 and 4 sevenths', + 9.625: '9 and 5 eigths', + 6.777: '6 and 7 ninths', + 3.1: '3 and a tenth', + 2.272: '2 and 3 elevenths', + 5.583: '5 and 7 twelveths', + 8.384: '8 and 5 thirteenths', + 0.071: 'a fourteenth', + 6.466: '6 and 7 fifteenths', + 8.312: '8 and 5 sixteenths', + 2.176: '2 and 3 seventeenths', + 200.722: '200 and 13 eighteenths', + 7.421: '7 and 8 nineteenths', + 0.05: 'a twentyith' +} + + +class TestNiceNumberFormat(unittest.TestCase): + + tmp_var = None + + def set_tmp_var(self, val): + self.tmp_var = val + + def test_convert_float_to_nice_number(self): + for number, number_str in NUMBERS_FIXTURE_EN.items(): + self.assertEqual(nice_number(number), number_str, + 'should format {} as {} and not {}'.format( + number, number_str, nice_number(number))) + + def test_specify_denominator(self): + self.assertEqual(nice_number(5.5, denominators=[1, 2, 3]), + '5 and a half', + 'should format 5.5 as 5 and a half not {}'.format( + nice_number(5.5, denominators=[1, 2, 3]))) + self.assertEqual(nice_number(2.333, denominators=[1, 2]), + '2.333', + 'should format 2.333 as 2.333 not {}'.format( + nice_number(2.333, denominators=[1, 2]))) + + def test_no_speech(self): + self.assertEqual(nice_number(6.777, speech=False), + '6 7/9', + 'should format 6.777 as 6 7/9 not {}'.format( + nice_number(6.777, speech=False))) + self.assertEqual(nice_number(6.0, speech=False), + '6', + 'should format 6.0 as 6 not {}'.format( + nice_number(6.0, speech=False))) + + +class TestPronounceNumber(unittest.TestCase): + def test_convert_int(self): + self.assertEqual(pronounce_number(0), "zero") + self.assertEqual(pronounce_number(1), "one") + self.assertEqual(pronounce_number(10), "ten") + self.assertEqual(pronounce_number(15), "fifteen") + self.assertEqual(pronounce_number(20), "twenty") + self.assertEqual(pronounce_number(27), "twenty seven") + self.assertEqual(pronounce_number(30), "thirty") + self.assertEqual(pronounce_number(33), "thirty three") + + def test_convert_negative_int(self): + self.assertEqual(pronounce_number(-1), "minus one") + self.assertEqual(pronounce_number(-10), "minus ten") + self.assertEqual(pronounce_number(-15), "minus fifteen") + self.assertEqual(pronounce_number(-20), "minus twenty") + self.assertEqual(pronounce_number(-27), "minus twenty seven") + self.assertEqual(pronounce_number(-30), "minus thirty") + self.assertEqual(pronounce_number(-33), "minus thirty three") + + def test_convert_decimals(self): + self.assertEqual(pronounce_number(0.05), "zero point zero five") + self.assertEqual(pronounce_number(-0.05), "minus zero point zero five") + self.assertEqual(pronounce_number(1.234), + "one point two three") + self.assertEqual(pronounce_number(21.234), + "twenty one point two three") + self.assertEqual(pronounce_number(21.234, places=1), + "twenty one point two") + self.assertEqual(pronounce_number(21.234, places=0), + "twenty one") + self.assertEqual(pronounce_number(21.234, places=3), + "twenty one point two three four") + self.assertEqual(pronounce_number(21.234, places=4), + "twenty one point two three four") + self.assertEqual(pronounce_number(21.234, places=5), + "twenty one point two three four") + self.assertEqual(pronounce_number(-1.234), + "minus one point two three") + self.assertEqual(pronounce_number(-21.234), + "minus twenty one point two three") + self.assertEqual(pronounce_number(-21.234, places=1), + "minus twenty one point two") + self.assertEqual(pronounce_number(-21.234, places=0), + "minus twenty one") + self.assertEqual(pronounce_number(-21.234, places=3), + "minus twenty one point two three four") + self.assertEqual(pronounce_number(-21.234, places=4), + "minus twenty one point two three four") + self.assertEqual(pronounce_number(-21.234, places=5), + "minus twenty one point two three four") + + def test_convert_hundreds(self): + self.assertEqual(pronounce_number(100), "one hundred") + self.assertEqual(pronounce_number(666), "six hundred and sixty six") + self.assertEqual(pronounce_number(1456), "fourteen fifty six") + self.assertEqual(pronounce_number(103254654), "one hundred and three " + "million, two hundred " + "and fifty four " + "thousand, six hundred " + "and fifty four") + self.assertEqual(pronounce_number(1512457), "one million, five hundred" + " and twelve thousand, " + "four hundred and fifty " + "seven") + self.assertEqual(pronounce_number(209996), "two hundred and nine " + "thousand, nine hundred " + "and ninety six") + + def test_convert_scientific_notation(self): + self.assertEqual(pronounce_number(0, scientific=True), "zero") + self.assertEqual(pronounce_number(33, scientific=True), + "three point three times ten to the power of one") + self.assertEqual(pronounce_number(299792458, scientific=True), + "two point nine nine times ten to the power of eight") + self.assertEqual(pronounce_number(299792458, places=6, + scientific=True), + "two point nine nine seven nine two five times " + "ten to the power of eight") + self.assertEqual(pronounce_number(1.672e-27, places=3, + scientific=True), + "one point six seven two times ten to the power of " + "negative twenty seven") + + def test_auto_scientific_notation(self): + self.assertEqual( + pronounce_number(1.1e-150), "one point one times ten to the " + "power of negative one hundred " + "and fifty") + # value is platform dependent so better not use in tests? + # self.assertEqual( + # pronounce_number(sys.float_info.min), "two point two two times " + # "ten to the power of " + # "negative three hundred " + # "and eight") + # self.assertEqual( + # pronounce_number(sys.float_info.max), "one point seven nine " + # "times ten to the power of" + # " three hundred and eight") + + def test_large_numbers(self): + self.assertEqual( + pronounce_number(299792458, short_scale=True), + "two hundred and ninety nine million, seven hundred " + "and ninety two thousand, four hundred and fifty eight") + self.assertEqual( + pronounce_number(299792458, short_scale=False), + "two hundred and ninety nine million, seven hundred " + "and ninety two thousand, four hundred and fifty eight") + self.assertEqual( + pronounce_number(100034000000299792458, short_scale=True), + "one hundred quintillion, thirty four quadrillion, " + "two hundred and ninety nine million, seven hundred " + "and ninety two thousand, four hundred and fifty eight") + self.assertEqual( + pronounce_number(100034000000299792458, short_scale=False), + "one hundred trillion, thirty four thousand billion, " + "two hundred and ninety nine million, seven hundred " + "and ninety two thousand, four hundred and fifty eight") + self.assertEqual( + pronounce_number(10000000000, short_scale=True), + "ten billion") + self.assertEqual( + pronounce_number(1000000000000, short_scale=True), + "one trillion") + # TODO maybe beautify this + self.assertEqual( + pronounce_number(1000001, short_scale=True), + "one million, one") + self.assertEqual(pronounce_number(95505896639631893), + "ninety five quadrillion, five hundred and five " + "trillion, eight hundred and ninety six billion, six " + "hundred and thirty nine million, six hundred and " + "thirty one thousand, eight hundred and ninety three") + self.assertEqual(pronounce_number(95505896639631893, + short_scale=False), + "ninety five thousand five hundred and five billion, " + "eight hundred and ninety six thousand six hundred " + "and thirty nine million, six hundred and thirty one " + "thousand, eight hundred and ninety three") + self.assertEqual(pronounce_number(10e80, places=1), + "one qesvigintillion") + # TODO floating point rounding issues might happen + self.assertEqual(pronounce_number(1.9874522571e80, places=9), + "one hundred and ninety eight quinquavigintillion, " + "seven hundred and forty five quattuorvigintillion, " + "two hundred and twenty five tresvigintillion, " + "seven hundred and nine uuovigintillion, " + "nine hundred and ninety nine unvigintillion, " + "nine hundred and eighty nine vigintillion, " + "seven hundred and thirty novendecillion, nine " + "hundred and nineteen octodecillion, nine hundred " + "and ninety nine septendecillion, nine hundred " + "and fifty five sedecillion, four hundred and " + "ninety eight quinquadecillion, two hundred and " + "fourteen quattuordecillion, eight hundred and " + "forty five tredecillion, four hundred and " + "twenty nine duodecillion, four hundred and " + "forty four undecillion, three hundred and " + "thirty six decillion, seven hundred and twenty " + "four nonillion, five hundred and sixty nine " + "octillion, three hundred and seventy five " + "septillion, two hundred and thirty nine sextillion," + " six hundred and seventy quintillion, five hundred " + "and seventy four quadrillion, seven hundred and " + "thirty nine trillion, seven hundred and forty " + "eight billion, four hundred and seventy million, " + "nine hundred and fifteen thousand, seventy two") + self.assertEqual(pronounce_number(1.00000000000000001e150), + "nine hundred and ninety nine millinillion, nine " + "hundred and ninety nine uncentillion, nine hundred " + "and ninety nine centillion, nine hundred and ninety" + " nine nonagintillion, nine hundred and ninety nine" + " octogintillion, nine hundred and eighty" + " septuagintillion, eight hundred and thirty five " + "sexagintillion, five hundred and ninety six " + "quinquagintillion, one hundred and seventy two" + " quadragintillion, four hundred and thirty seven" + " noventrigintillion, three hundred and seventy four" + " octotrigintillion, five hundred and ninety" + " septentrigintillion, five hundred and seventy" + " three sestrigintillion, one hundred and twenty " + "quinquatrigintillion, fourteen quattuortrigintillion" + ", thirty trestrigintillion, three hundred and " + "eighteen duotrigintillion, seven hundred and ninety" + " three untrigintillion, ninety one trigintillion," + " one hundred and sixty four novemvigintillion, eight" + " hundred and ten octovigintillion, one hundred and" + " fifty four septemvigintillion, one hundred " + "qesvigintillion, one hundred and twelve " + "quinquavigintillion, two hundred and three " + "quattuorvigintillion, six hundred and seventy " + "eight tresvigintillion, five hundred and eighty " + "two uuovigintillion, nine hundred and seventy six" + " unvigintillion, two hundred and ninety eight " + "vigintillion, two hundred and sixty eight " + "novendecillion, six hundred and sixteen " + "octodecillion, two hundred and twenty one " + "septendecillion, one hundred and fifty one" + " sedecillion, nine hundred and sixty two " + "quinquadecillion, seven hundred and two" + " quattuordecillion, sixty tredecillion, two hundred" + " and sixty six duodecillion, one hundred and " + "seventy six undecillion, five decillion, four " + "hundred and forty nonillion, five hundred and" + " sixty seven octillion, thirty two septillion, " + "three hundred and thirty one sextillion, " + "two hundred and eight quintillion, four hundred and " + "three quadrillion, nine hundred and forty eight " + "trillion, two hundred and thirty three billion, " + "three hundred and seventy three million, five " + "hundred and fifteen thousand, seven hundred and " + "seventy six") + + # infinity + self.assertEqual( + pronounce_number(sys.float_info.max * 2), "infinity") + self.assertEqual( + pronounce_number(float("inf")), + "infinity") + self.assertEqual( + pronounce_number(float("-inf")), + "negative infinity") + + def test_ordinals(self): + self.assertEqual(pronounce_number(1, ordinals=True), "first") + self.assertEqual(pronounce_number(10, ordinals=True), "tenth") + self.assertEqual(pronounce_number(15, ordinals=True), "fifteenth") + self.assertEqual(pronounce_number(20, ordinals=True), "twentieth") + self.assertEqual(pronounce_number(27, ordinals=True), "twenty seventh") + self.assertEqual(pronounce_number(30, ordinals=True), "thirtieth") + self.assertEqual(pronounce_number(33, ordinals=True), "thirty third") + self.assertEqual(pronounce_number(100, ordinals=True), "hundredth") + self.assertEqual(pronounce_number(1000, ordinals=True), "thousandth") + self.assertEqual(pronounce_number(10000, ordinals=True), + "ten thousandth") + self.assertEqual(pronounce_number(18691, ordinals=True), + "eighteen thousand, six hundred and ninety first") + self.assertEqual(pronounce_number(1567, ordinals=True), + "one thousand, five hundred and sixty seventh") + self.assertEqual(pronounce_number(1.672e-27, places=3, + scientific=True, ordinals=True), + "one point six seven two times ten to the negative " + "twenty seventh power") + self.assertEqual(pronounce_number(18e6, ordinals=True), + "eighteen millionth") + self.assertEqual(pronounce_number(18e12, ordinals=True, + short_scale=False), + "eighteen billionth") + self.assertEqual(pronounce_number(18e12, ordinals=True), + "eighteen trillionth") + self.assertEqual(pronounce_number(18e18, ordinals=True, + short_scale=False), "eighteen " + "trillionth") + + +class TestNiceDateFormat(unittest.TestCase): + + def test_convert_times(self): + dt = datetime.datetime(2017, 1, 31, + 13, 22, 3, tzinfo=default_timezone()) + + # Verify defaults haven't changed + self.assertEqual(nice_time(dt), + nice_time(dt, "en-us", True, False, False)) + + self.assertEqual(nice_time(dt), + "one twenty two") + + self.assertEqual(nice_time(dt, use_ampm=True), + "one twenty two p.m.") + self.assertEqual(nice_time(dt, speech=False), + "1:22") + self.assertEqual(nice_time(dt, speech=False, use_ampm=True), + "1:22 PM") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True), + "13:22") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True, + use_ampm=True), + "13:22") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), + "thirteen twenty two") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), + "thirteen twenty two") + + dt = datetime.datetime(2017, 1, 31, + 13, 0, 3, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "one o'clock") + self.assertEqual(nice_time(dt, use_ampm=True), + "one p.m.") + self.assertEqual(nice_time(dt, speech=False), + "1:00") + self.assertEqual(nice_time(dt, speech=False, use_ampm=True), + "1:00 PM") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True), + "13:00") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True, + use_ampm=True), + "13:00") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), + "thirteen hundred") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), + "thirteen hundred") + + dt = datetime.datetime(2017, 1, 31, + 13, 2, 3, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "one oh two") + self.assertEqual(nice_time(dt, use_ampm=True), + "one oh two p.m.") + self.assertEqual(nice_time(dt, speech=False), + "1:02") + self.assertEqual(nice_time(dt, speech=False, use_ampm=True), + "1:02 PM") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True), + "13:02") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True, + use_ampm=True), + "13:02") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), + "thirteen zero two") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), + "thirteen zero two") + + dt = datetime.datetime(2017, 1, 31, + 0, 2, 3, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "twelve oh two") + self.assertEqual(nice_time(dt, use_ampm=True), + "twelve oh two a.m.") + self.assertEqual(nice_time(dt, speech=False), + "12:02") + self.assertEqual(nice_time(dt, speech=False, use_ampm=True), + "12:02 AM") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True), + "00:02") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True, + use_ampm=True), + "00:02") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), + "zero zero zero two") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), + "zero zero zero two") + + dt = datetime.datetime(2018, 2, 8, + 1, 2, 33, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "one oh two") + self.assertEqual(nice_time(dt, use_ampm=True), + "one oh two a.m.") + self.assertEqual(nice_time(dt, speech=False), + "1:02") + self.assertEqual(nice_time(dt, speech=False, use_ampm=True), + "1:02 AM") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True), + "01:02") + self.assertEqual(nice_time(dt, speech=False, use_24hour=True, + use_ampm=True), + "01:02") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=True), + "zero one zero two") + self.assertEqual(nice_time(dt, use_24hour=True, use_ampm=False), + "zero one zero two") + + dt = datetime.datetime(2017, 1, 31, + 12, 15, 9, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "quarter past twelve") + self.assertEqual(nice_time(dt, use_ampm=True), + "quarter past twelve p.m.") + + dt = datetime.datetime(2017, 1, 31, + 5, 30, 00, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt, use_ampm=True), + "half past five a.m.") + + dt = datetime.datetime(2017, 1, 31, + 1, 45, 00, tzinfo=default_timezone()) + self.assertEqual(nice_time(dt), + "quarter to two") + + + def test_nice_duration(self): + self.assertEqual(nice_duration(1), "one second") + self.assertEqual(nice_duration(3), "three seconds") + self.assertEqual(nice_duration(1, speech=False), "0:01") + self.assertEqual(nice_duration(61), "one minute one second") + self.assertEqual(nice_duration(61, speech=False), "1:01") + self.assertEqual(nice_duration(5000), + "one hour twenty three minutes twenty seconds") + self.assertEqual(nice_duration(5000, speech=False), "1:23:20") + self.assertEqual(nice_duration(50000), + "thirteen hours fifty three minutes twenty seconds") + self.assertEqual(nice_duration(50000, speech=False), "13:53:20") + self.assertEqual(nice_duration(500000), + "five days eighteen hours fifty three minutes twenty seconds") # nopep8 + self.assertEqual(nice_duration(500000, speech=False), "5d 18:53:20") + self.assertEqual(nice_duration(datetime.timedelta(seconds=500000), + speech=False), + "5d 18:53:20") + + def test_join(self): + self.assertEqual(join_list(None, "and"), "") + self.assertEqual(join_list([], "and"), "") + + self.assertEqual(join_list(["a"], "and"), "a") + self.assertEqual(join_list(["a", "b"], "and"), "a and b") + self.assertEqual(join_list(["a", "b"], "or"), "a or b") + + self.assertEqual(join_list(["a", "b", "c"], "and"), "a, b and c") + self.assertEqual(join_list(["a", "b", "c"], "or"), "a, b or c") + self.assertEqual(join_list(["a", "b", "c"], "or", ";"), "a; b or c") + self.assertEqual(join_list(["a", "b", "c", "d"], "or"), "a, b, c or d") + + self.assertEqual(join_list([1, "b", 3, "d"], "or"), "1, b, 3 or d") + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_parse.py b/test/test_parse.py index a494cc2f..7d359e51 100644 --- a/test/test_parse.py +++ b/test/test_parse.py @@ -14,23 +14,17 @@ # limitations under the License. # import unittest -from datetime import datetime, timedelta -from dateutil import tz +from datetime import datetime +from dateutil import tz from lingua_franca import load_language, unload_language, set_default_lang -from lingua_franca.internal import FunctionNotLocalizedError -from lingua_franca.time import default_timezone from lingua_franca.parse import extract_datetime -from lingua_franca.parse import extract_duration -from lingua_franca.parse import extract_number, extract_numbers from lingua_franca.parse import fuzzy_match -from lingua_franca.parse import get_gender from lingua_franca.parse import match_one -from lingua_franca.parse import normalize +from lingua_franca.time import default_timezone, now_local, set_default_tz def setUpModule(): - # TODO spin off English tests load_language('en') set_default_lang('en') @@ -39,6 +33,51 @@ def tearDownModule(): unload_language('en') +class TestTimezones(unittest.TestCase): + def test_default_tz(self): + default = default_timezone() + + naive = datetime.now() + + # convert to default tz + set_default_tz("Europe/London") + dt = extract_datetime("tomorrow", anchorDate=naive)[0] + self.assertEqual(dt.tzinfo, tz.gettz("Europe/London")) + + set_default_tz("America/Chicago") + dt = extract_datetime("tomorrow", anchorDate=naive)[0] + self.assertEqual(dt.tzinfo, tz.gettz("America/Chicago")) + + set_default_tz(default) # undo changes to default tz after test + + def test_convert_to_anchorTZ(self): + default = default_timezone() + naive = datetime.now() + local = now_local() + london_time = datetime.now(tz=tz.gettz("Europe/London")) + us_time = datetime.now(tz=tz.gettz("America/Chicago")) + + # convert to anchor date + dt = extract_datetime("tomorrow", anchorDate=naive)[0] + self.assertEqual(dt.tzinfo, default_timezone()) + dt = extract_datetime("tomorrow", anchorDate=local)[0] + self.assertEqual(dt.tzinfo, local.tzinfo) + dt = extract_datetime("tomorrow", anchorDate=london_time)[0] + self.assertEqual(dt.tzinfo, london_time.tzinfo) + dt = extract_datetime("tomorrow", anchorDate=us_time)[0] + self.assertEqual(dt.tzinfo, us_time.tzinfo) + + # test naive == default tz + set_default_tz("America/Chicago") + dt = extract_datetime("tomorrow", anchorDate=naive)[0] + self.assertEqual(dt.tzinfo, default_timezone()) + set_default_tz("Europe/London") + dt = extract_datetime("tomorrow", anchorDate=naive)[0] + self.assertEqual(dt.tzinfo, default_timezone()) + + set_default_tz(default) # undo changes to default tz after test + + class TestFuzzyMatch(unittest.TestCase): def test_matches(self): self.assertTrue(fuzzy_match("you and me", "you and me") >= 1.0) @@ -61,1034 +100,5 @@ def test_match_one(self): self.assertEqual(match_one('enry', choices)[0], 4) -class TestNormalize(unittest.TestCase): - def test_articles(self): - self.assertEqual(normalize("this is a test", remove_articles=True), - "this is test") - self.assertEqual(normalize("this is the test", remove_articles=True), - "this is test") - self.assertEqual(normalize("and another test", remove_articles=True), - "and another test") - self.assertEqual(normalize("this is an extra test", - remove_articles=False), - "this is an extra test") - - def test_extract_number_priority(self): - # sanity check - self.assertEqual(extract_number("third", ordinals=True), 3) - self.assertEqual(extract_number("sixth", ordinals=True), 6) - - # TODO a suite of tests needs to be written depending on outcome of - # https://github.com/MycroftAI/lingua-franca/issues/152 - # the tests bellow are flagged as problematic, some of those ARE BROKEN - # for now this is considered undefined behaviour!!! - - # NOTE this test is returning the first number, which seems to be - # the consensus regarding correct behaviour - self.assertEqual(extract_number("Twenty two and Three Fifths", - ordinals=True), 22) - - # TODO these should return the 1st number, not the last, ordinals - # seem messed up, the rest of the codebase is returning first - # number most likely tests bellow are bugs, i repeat, tests bellow - # are testing FOR THE "WRONG" VALUE - self.assertEqual(extract_number("sixth third", ordinals=True), 3) - self.assertEqual(extract_number("third sixth", ordinals=True), 6) - - def test_extract_number_ambiguous(self): - # test explicit ordinals - self.assertEqual(extract_number("this is the 1st", - ordinals=True), 1) - self.assertEqual(extract_number("this is the 2nd", - ordinals=False), 2) - self.assertEqual(extract_number("this is the 3rd", - ordinals=None), 3) - self.assertEqual(extract_number("this is the 4th", - ordinals=None), 4) - self.assertEqual(extract_number( - "this is the 7th test", ordinals=True), 7) - self.assertEqual(extract_number( - "this is the 7th test", ordinals=False), 7) - self.assertTrue(extract_number("this is the nth test") is False) - self.assertEqual(extract_number("this is the 1st test"), 1) - self.assertEqual(extract_number("this is the 2nd test"), 2) - self.assertEqual(extract_number("this is the 3rd test"), 3) - self.assertEqual(extract_number("this is the 31st test"), 31) - self.assertEqual(extract_number("this is the 32nd test"), 32) - self.assertEqual(extract_number("this is the 33rd test"), 33) - self.assertEqual(extract_number("this is the 34th test"), 34) - - # test non ambiguous ordinals - self.assertEqual(extract_number("this is the first test", - ordinals=True), 1) - self.assertEqual(extract_number("this is the first test", - ordinals=False), False) - self.assertEqual(extract_number("this is the first test", - ordinals=None), False) - - # test ambiguous ordinal/time unit - self.assertEqual(extract_number("this is second test", - ordinals=True), 2) - self.assertEqual(extract_number("this is second test", - ordinals=False), False) - self.assertEqual(extract_number("remind me in a second", - ordinals=True), 2) - self.assertEqual(extract_number("remind me in a second", - ordinals=False), False) - self.assertEqual(extract_number("remind me in a second", - ordinals=None), False) - - # test ambiguous ordinal/fractional - self.assertEqual(extract_number("this is the third test", - ordinals=True), 3.0) - self.assertEqual(extract_number("this is the third test", - ordinals=False), 1.0 / 3.0) - self.assertEqual(extract_number("this is the third test", - ordinals=None), False) - - self.assertEqual(extract_number("one third of a cup", - ordinals=False), 1.0 / 3.0) - self.assertEqual(extract_number("one third of a cup", - ordinals=True), 3) - self.assertEqual(extract_number("one third of a cup", - ordinals=None), 1) - - # test plurals - # NOTE plurals are never considered ordinals, but also not - # considered explicit fractions - self.assertEqual(extract_number("2 fifths", - ordinals=True), 2) - self.assertEqual(extract_number("2 fifth", - ordinals=True), 5) - self.assertEqual(extract_number("2 fifths", - ordinals=False), 2/5) - self.assertEqual(extract_number("2 fifths", - ordinals=None), 2) - - self.assertEqual(extract_number("Twenty two and Three Fifths"), 22.6) - - # test multiple ambiguous - self.assertEqual(extract_number("sixth third", ordinals=None), False) - self.assertEqual(extract_number("thirty second", ordinals=False), 30) - self.assertEqual(extract_number("thirty second", ordinals=None), 30) - self.assertEqual(extract_number("thirty second", ordinals=True), 32) - # TODO this test is imperfect, further discussion needed - # "Sixth third" would probably refer to "the sixth instance of a third" - # I dunno what should be returned here, don't think it should be cumulative. - self.assertEqual(extract_number("sixth third", ordinals=False), - 1 / 6 / 3) - - # test big numbers / short vs long scale - self.assertEqual(extract_number("this is the billionth test", - ordinals=True), 1e09) - self.assertEqual(extract_number("this is the billionth test", - ordinals=None), False) - - self.assertEqual(extract_number("this is the billionth test", - ordinals=False), 1e-9) - self.assertEqual(extract_number("this is the billionth test", - ordinals=True, - short_scale=False), 1e12) - self.assertEqual(extract_number("this is the billionth test", - ordinals=None, - short_scale=False), False) - self.assertEqual(extract_number("this is the billionth test", - short_scale=False), 1e-12) - - # test the Nth one - self.assertEqual(extract_number("the fourth one", ordinals=True), 4.0) - self.assertEqual(extract_number("the thirty sixth one", - ordinals=True), 36.0) - self.assertEqual(extract_number( - "you are the second one", ordinals=False), 1) - self.assertEqual(extract_number( - "you are the second one", ordinals=True), 2) - self.assertEqual(extract_number("you are the 1st one", - ordinals=None), 1) - self.assertEqual(extract_number("you are the 2nd one", - ordinals=None), 2) - self.assertEqual(extract_number("you are the 3rd one", - ordinals=None), 3) - self.assertEqual(extract_number("you are the 8th one", - ordinals=None), 8) - - def test_extract_number(self): - - self.assertEqual(extract_number("this is 2 test"), 2) - self.assertEqual(extract_number("this is test number 4"), 4) - self.assertEqual(extract_number("three cups"), 3) - self.assertEqual(extract_number("1/3 cups"), 1.0 / 3.0) - self.assertEqual(extract_number("quarter cup"), 0.25) - self.assertEqual(extract_number("1/4 cup"), 0.25) - self.assertEqual(extract_number("one fourth cup"), 0.25) - self.assertEqual(extract_number("2/3 cups"), 2.0 / 3.0) - self.assertEqual(extract_number("3/4 cups"), 3.0 / 4.0) - self.assertEqual(extract_number("1 and 3/4 cups"), 1.75) - self.assertEqual(extract_number("1 cup and a half"), 1.5) - self.assertEqual(extract_number("one cup and a half"), 1.5) - self.assertEqual(extract_number("one and a half cups"), 1.5) - self.assertEqual(extract_number("one and one half cups"), 1.5) - self.assertEqual(extract_number("three quarter cups"), 3.0 / 4.0) - self.assertEqual(extract_number("three quarters cups"), 3.0 / 4.0) - self.assertEqual(extract_number("twenty two"), 22) - self.assertEqual(extract_number( - "Twenty two with a leading capital letter"), 22) - self.assertEqual(extract_number( - "twenty Two with Two capital letters"), 22) - self.assertEqual(extract_number( - "twenty Two with mixed capital letters"), 22) - self.assertEqual(extract_number("two hundred"), 200) - self.assertEqual(extract_number("nine thousand"), 9000) - self.assertEqual(extract_number("six hundred sixty six"), 666) - self.assertEqual(extract_number("two million"), 2000000) - self.assertEqual(extract_number("two million five hundred thousand " - "tons of spinning metal"), 2500000) - self.assertEqual(extract_number("six trillion"), 6000000000000.0) - self.assertEqual(extract_number("six trillion", short_scale=False), - 6e+18) - self.assertEqual(extract_number("one point five"), 1.5) - self.assertEqual(extract_number("three dot fourteen"), 3.14) - self.assertEqual(extract_number("zero point two"), 0.2) - self.assertEqual(extract_number("billions of years older"), - 1000000000.0) - self.assertEqual(extract_number("billions of years older", - short_scale=False), - 1000000000000.0) - self.assertEqual(extract_number("one hundred thousand"), 100000) - self.assertEqual(extract_number("minus 2"), -2) - self.assertEqual(extract_number("negative seventy"), -70) - self.assertEqual(extract_number("thousand million"), 1000000000) - - # Verify non-power multiples of ten no longer discard - # adjacent multipliers - self.assertEqual(extract_number("twenty thousand"), 20000) - self.assertEqual(extract_number("fifty million"), 50000000) - - # Verify smaller powers of ten no longer cause miscalculation of larger - # powers of ten (see MycroftAI#86) - self.assertEqual(extract_number("twenty billion three hundred million \ - nine hundred fifty thousand six hundred \ - seventy five point eight"), - 20300950675.8) - self.assertEqual(extract_number("nine hundred ninety nine million nine \ - hundred ninety nine thousand nine \ - hundred ninety nine point nine"), - 999999999.9) - - # TODO why does "trillion" result in xxxx.0? - self.assertEqual(extract_number("eight hundred trillion two hundred \ - fifty seven"), 800000000000257.0) - - # TODO handle this case - # self.assertEqual( - # extract_number("6 dot six six six"), - # 6.666) - self.assertTrue(extract_number("The tennis player is fast") is False) - self.assertTrue(extract_number("fraggle") is False) - - self.assertTrue(extract_number("fraggle zero") is not False) - self.assertEqual(extract_number("fraggle zero"), 0) - - self.assertTrue(extract_number("grobo 0") is not False) - self.assertEqual(extract_number("grobo 0"), 0) - - self.assertEqual(extract_number("a couple of beers"), 2) - self.assertEqual(extract_number("a couple hundred beers"), 200) - self.assertEqual(extract_number("a couple thousand beers"), 2000) - self.assertEqual(extract_number("totally 100%"), 100) - - def test_extract_duration_en(self): - self.assertEqual(extract_duration("10 seconds"), - (timedelta(seconds=10.0), "")) - self.assertEqual(extract_duration("5 minutes"), - (timedelta(minutes=5), "")) - self.assertEqual(extract_duration("2 hours"), - (timedelta(hours=2), "")) - self.assertEqual(extract_duration("3 days"), - (timedelta(days=3), "")) - self.assertEqual(extract_duration("25 weeks"), - (timedelta(weeks=25), "")) - self.assertEqual(extract_duration("seven hours"), - (timedelta(hours=7), "")) - self.assertEqual(extract_duration("7.5 seconds"), - (timedelta(seconds=7.5), "")) - self.assertEqual(extract_duration("eight and a half days thirty" - " nine seconds"), - (timedelta(days=8.5, seconds=39), "")) - self.assertEqual(extract_duration("wake me up in three weeks, four" - " hundred ninety seven days, and" - " three hundred 91.6 seconds"), - (timedelta(weeks=3, days=497, seconds=391.6), - "wake me up in , , and")) - self.assertEqual(extract_duration("10-seconds"), - (timedelta(seconds=10.0), "")) - self.assertEqual(extract_duration("5-minutes"), - (timedelta(minutes=5), "")) - - def test_extract_duration_case_en(self): - self.assertEqual(extract_duration("Set a timer for 30 minutes"), - (timedelta(minutes=30), "Set a timer for")) - self.assertEqual(extract_duration("The movie is one hour, fifty seven" - " and a half minutes long"), - (timedelta(hours=1, minutes=57.5), - "The movie is , long")) - self.assertEqual(extract_duration("Four and a Half minutes until" - " sunset"), - (timedelta(minutes=4.5), "until sunset")) - self.assertEqual(extract_duration("Nineteen minutes past THE hour"), - (timedelta(minutes=19), "past THE hour")) - - def test_extractdatetime_fractions_en(self): - def extractWithFormat(text): - date = datetime(2017, 6, 27, 13, 4, tzinfo=default_timezone()) # Tue June 27, 2017 @ 1:04pm - [extractedDate, leftover] = extract_datetime(text, date) - extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") - return [extractedDate, leftover] - - def testExtract(text, expected_date, expected_leftover): - res = extractWithFormat(normalize(text)) - self.assertEqual(res[0], expected_date, "for=" + text) - self.assertEqual(res[1], expected_leftover, "for=" + text) - - testExtract("Set the ambush for half an hour", - "2017-06-27 13:34:00", "set ambush") - testExtract("remind me to call mom in half an hour", - "2017-06-27 13:34:00", "remind me to call mom") - testExtract("remind me to call mom in a half hour", - "2017-06-27 13:34:00", "remind me to call mom") - testExtract("remind me to call mom in a quarter hour", - "2017-06-27 13:19:00", "remind me to call mom") - testExtract("remind me to call mom in a quarter of an hour", - "2017-06-27 13:19:00", "remind me to call mom") - - def test_extractdatetime_en(self): - def extractWithFormat(text): - date = datetime(2017, 6, 27, 13, 4, tzinfo=default_timezone()) # Tue June 27, 2017 @ 1:04pm - [extractedDate, leftover] = extract_datetime(text, date) - extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") - return [extractedDate, leftover] - - def testExtract(text, expected_date, expected_leftover): - res = extractWithFormat(normalize(text)) - self.assertEqual(res[0], expected_date, "for=" + text) - self.assertEqual(res[1], expected_leftover, "for=" + text) - - testExtract("now is the time", - "2017-06-27 13:04:00", "is time") - testExtract("in a second", - "2017-06-27 13:04:01", "") - testExtract("in a minute", - "2017-06-27 13:05:00", "") - testExtract("in a couple minutes", - "2017-06-27 13:06:00", "") - testExtract("in a couple of minutes", - "2017-06-27 13:06:00", "") - testExtract("in a couple hours", - "2017-06-27 15:04:00", "") - testExtract("in a couple of hours", - "2017-06-27 15:04:00", "") - testExtract("in a couple weeks", - "2017-07-11 00:00:00", "") - testExtract("in a couple of weeks", - "2017-07-11 00:00:00", "") - testExtract("in a couple months", - "2017-08-27 00:00:00", "") - testExtract("in a couple years", - "2019-06-27 00:00:00", "") - testExtract("in a couple of months", - "2017-08-27 00:00:00", "") - testExtract("in a couple of years", - "2019-06-27 00:00:00", "") - testExtract("in a decade", - "2027-06-27 00:00:00", "") - testExtract("in a couple of decades", - "2037-06-27 00:00:00", "") - testExtract("next decade", - "2027-06-27 00:00:00", "") - testExtract("in a century", - "2117-06-27 00:00:00", "") - testExtract("in a millennium", - "3017-06-27 00:00:00", "") - testExtract("in a couple decades", - "2037-06-27 00:00:00", "") - testExtract("in 5 decades", - "2067-06-27 00:00:00", "") - testExtract("in a couple centuries", - "2217-06-27 00:00:00", "") - testExtract("in a couple of centuries", - "2217-06-27 00:00:00", "") - testExtract("in 2 centuries", - "2217-06-27 00:00:00", "") - testExtract("in a couple millenniums", - "4017-06-27 00:00:00", "") - testExtract("in a couple of millenniums", - "4017-06-27 00:00:00", "") - testExtract("in an hour", - "2017-06-27 14:04:00", "") - testExtract("i want it within the hour", - "2017-06-27 14:04:00", "i want it") - testExtract("in 1 second", - "2017-06-27 13:04:01", "") - testExtract("in 2 seconds", - "2017-06-27 13:04:02", "") - testExtract("Set the ambush in 1 minute", - "2017-06-27 13:05:00", "set ambush") - testExtract("Set the ambush for 5 days from today", - "2017-07-02 00:00:00", "set ambush") - testExtract("day after tomorrow", - "2017-06-29 00:00:00", "") - testExtract("What is the day after tomorrow's weather?", - "2017-06-29 00:00:00", "what is weather") - testExtract("Remind me at 10:45 pm", - "2017-06-27 22:45:00", "remind me") - testExtract("what is the weather on friday morning", - "2017-06-30 08:00:00", "what is weather") - testExtract("what is tomorrow's weather", - "2017-06-28 00:00:00", "what is weather") - testExtract("what is this afternoon's weather", - "2017-06-27 15:00:00", "what is weather") - testExtract("what is this evening's weather", - "2017-06-27 19:00:00", "what is weather") - testExtract("what was this morning's weather", - "2017-06-27 08:00:00", "what was weather") - testExtract("remind me to call mom in 8 weeks and 2 days", - "2017-08-24 00:00:00", "remind me to call mom") - testExtract("remind me to call mom on august 3rd", - "2017-08-03 00:00:00", "remind me to call mom") - testExtract("remind me tomorrow to call mom at 7am", - "2017-06-28 07:00:00", "remind me to call mom") - testExtract("remind me tomorrow to call mom at 10pm", - "2017-06-28 22:00:00", "remind me to call mom") - testExtract("remind me to call mom at 7am", - "2017-06-28 07:00:00", "remind me to call mom") - testExtract("remind me to call mom in an hour", - "2017-06-27 14:04:00", "remind me to call mom") - testExtract("remind me to call mom at 1730", - "2017-06-27 17:30:00", "remind me to call mom") - testExtract("remind me to call mom at 0630", - "2017-06-28 06:30:00", "remind me to call mom") - testExtract("remind me to call mom at 06 30 hours", - "2017-06-28 06:30:00", "remind me to call mom") - testExtract("remind me to call mom at 06 30", - "2017-06-28 06:30:00", "remind me to call mom") - testExtract("remind me to call mom at 06 30 hours", - "2017-06-28 06:30:00", "remind me to call mom") - testExtract("remind me to call mom at 7 o'clock", - "2017-06-27 19:00:00", "remind me to call mom") - testExtract("remind me to call mom this evening at 7 o'clock", - "2017-06-27 19:00:00", "remind me to call mom") - testExtract("remind me to call mom at 7 o'clock tonight", - "2017-06-27 19:00:00", "remind me to call mom") - testExtract("remind me to call mom at 7 o'clock in the morning", - "2017-06-28 07:00:00", "remind me to call mom") - testExtract("remind me to call mom Thursday evening at 7 o'clock", - "2017-06-29 19:00:00", "remind me to call mom") - testExtract("remind me to call mom Thursday morning at 7 o'clock", - "2017-06-29 07:00:00", "remind me to call mom") - testExtract("remind me to call mom at 7 o'clock Thursday morning", - "2017-06-29 07:00:00", "remind me to call mom") - testExtract("remind me to call mom at 7:00 Thursday morning", - "2017-06-29 07:00:00", "remind me to call mom") - # TODO: This test is imperfect due to the "at 7:00" still in the - # remainder. But let it pass for now since time is correct - testExtract("remind me to call mom at 7:00 Thursday evening", - "2017-06-29 19:00:00", "remind me to call mom at 7:00") - testExtract("remind me to call mom at 8 Wednesday evening", - "2017-06-28 20:00:00", "remind me to call mom") - testExtract("remind me to call mom at 8 Wednesday in the evening", - "2017-06-28 20:00:00", "remind me to call mom") - testExtract("remind me to call mom Wednesday evening at 8", - "2017-06-28 20:00:00", "remind me to call mom") - testExtract("remind me to call mom in two hours", - "2017-06-27 15:04:00", "remind me to call mom") - testExtract("remind me to call mom in 2 hours", - "2017-06-27 15:04:00", "remind me to call mom") - testExtract("remind me to call mom in 15 minutes", - "2017-06-27 13:19:00", "remind me to call mom") - testExtract("remind me to call mom in fifteen minutes", - "2017-06-27 13:19:00", "remind me to call mom") - testExtract("remind me to call mom at 10am 2 days after this saturday", - "2017-07-03 10:00:00", "remind me to call mom") - testExtract("Play Rick Astley music 2 days from Friday", - "2017-07-02 00:00:00", "play rick astley music") - testExtract("Begin the invasion at 3:45 pm on Thursday", - "2017-06-29 15:45:00", "begin invasion") - testExtract("On Monday, order pie from the bakery", - "2017-07-03 00:00:00", "order pie from bakery") - testExtract("Play Happy Birthday music 5 years from today", - "2022-06-27 00:00:00", "play happy birthday music") - testExtract("Skype Mom at 12:45 pm next Thursday", - "2017-07-06 12:45:00", "skype mom") - testExtract("What's the weather next Friday?", - "2017-06-30 00:00:00", "what weather") - testExtract("What's the weather next Wednesday?", - "2017-07-05 00:00:00", "what weather") - testExtract("What's the weather next Thursday?", - "2017-07-06 00:00:00", "what weather") - testExtract("what is the weather next friday morning", - "2017-06-30 08:00:00", "what is weather") - testExtract("what is the weather next friday evening", - "2017-06-30 19:00:00", "what is weather") - testExtract("what is the weather next friday afternoon", - "2017-06-30 15:00:00", "what is weather") - testExtract("remind me to call mom on august 3rd", - "2017-08-03 00:00:00", "remind me to call mom") - testExtract("Buy fireworks on the 4th of July", - "2017-07-04 00:00:00", "buy fireworks") - testExtract("what is the weather 2 weeks from next friday", - "2017-07-14 00:00:00", "what is weather") - testExtract("what is the weather wednesday at 0700 hours", - "2017-06-28 07:00:00", "what is weather") - testExtract("set an alarm wednesday at 7 o'clock", - "2017-06-28 07:00:00", "set alarm") - testExtract("Set up an appointment at 12:45 pm next Thursday", - "2017-07-06 12:45:00", "set up appointment") - testExtract("What's the weather this Thursday?", - "2017-06-29 00:00:00", "what weather") - testExtract("set up the visit for 2 weeks and 6 days from Saturday", - "2017-07-21 00:00:00", "set up visit") - testExtract("Begin the invasion at 03 45 on Thursday", - "2017-06-29 03:45:00", "begin invasion") - testExtract("Begin the invasion at o 800 hours on Thursday", - "2017-06-29 08:00:00", "begin invasion") - testExtract("Begin the party at 8 o'clock in the evening on Thursday", - "2017-06-29 20:00:00", "begin party") - testExtract("Begin the invasion at 8 in the evening on Thursday", - "2017-06-29 20:00:00", "begin invasion") - testExtract("Begin the invasion on Thursday at noon", - "2017-06-29 12:00:00", "begin invasion") - testExtract("Begin the invasion on Thursday at midnight", - "2017-06-29 00:00:00", "begin invasion") - testExtract("Begin the invasion on Thursday at 0500", - "2017-06-29 05:00:00", "begin invasion") - testExtract("remind me to wake up in 4 years", - "2021-06-27 00:00:00", "remind me to wake up") - testExtract("remind me to wake up in 4 years and 4 days", - "2021-07-01 00:00:00", "remind me to wake up") - testExtract("What is the weather 3 days after tomorrow?", - "2017-07-01 00:00:00", "what is weather") - testExtract("december 3", - "2017-12-03 00:00:00", "") - testExtract("lets meet at 8:00 tonight", - "2017-06-27 20:00:00", "lets meet") - testExtract("lets meet at 5pm", - "2017-06-27 17:00:00", "lets meet") - testExtract("lets meet at 8 a.m.", - "2017-06-28 08:00:00", "lets meet") - testExtract("remind me to wake up at 8 a.m", - "2017-06-28 08:00:00", "remind me to wake up") - testExtract("what is the weather on tuesday", - "2017-06-27 00:00:00", "what is weather") - testExtract("what is the weather on monday", - "2017-07-03 00:00:00", "what is weather") - testExtract("what is the weather this wednesday", - "2017-06-28 00:00:00", "what is weather") - testExtract("on thursday what is the weather", - "2017-06-29 00:00:00", "what is weather") - testExtract("on this thursday what is the weather", - "2017-06-29 00:00:00", "what is weather") - testExtract("on last monday what was the weather", - "2017-06-26 00:00:00", "what was weather") - testExtract("set an alarm for wednesday evening at 8", - "2017-06-28 20:00:00", "set alarm") - testExtract("set an alarm for wednesday at 3 o'clock in the afternoon", - "2017-06-28 15:00:00", "set alarm") - testExtract("set an alarm for wednesday at 3 o'clock in the morning", - "2017-06-28 03:00:00", "set alarm") - testExtract("set an alarm for wednesday morning at 7 o'clock", - "2017-06-28 07:00:00", "set alarm") - testExtract("set an alarm for today at 7 o'clock", - "2017-06-27 19:00:00", "set alarm") - testExtract("set an alarm for this evening at 7 o'clock", - "2017-06-27 19:00:00", "set alarm") - # TODO: This test is imperfect due to the "at 7:00" still in the - # remainder. But let it pass for now since time is correct - testExtract("set an alarm for this evening at 7:00", - "2017-06-27 19:00:00", "set alarm at 7:00") - testExtract("on the evening of june 5th 2017 remind me to" + - " call my mother", - "2017-06-05 19:00:00", "remind me to call my mother") - # TODO: This test is imperfect due to the missing "for" in the - # remainder. But let it pass for now since time is correct - testExtract("update my calendar for a morning meeting with julius" + - " on march 4th", - "2018-03-04 08:00:00", - "update my calendar meeting with julius") - testExtract("remind me to call mom next tuesday", - "2017-07-04 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 3 weeks", - "2017-07-18 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 8 weeks", - "2017-08-22 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 8 weeks and 2 days", - "2017-08-24 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 4 days", - "2017-07-01 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 3 months", - "2017-09-27 00:00:00", "remind me to call mom") - testExtract("remind me to call mom in 2 years and 2 days", - "2019-06-29 00:00:00", "remind me to call mom") - testExtract("remind me to call mom next week", - "2017-07-04 00:00:00", "remind me to call mom") - testExtract("remind me to call mom at 10am on saturday", - "2017-07-01 10:00:00", "remind me to call mom") - testExtract("remind me to call mom at 10am this saturday", - "2017-07-01 10:00:00", "remind me to call mom") - testExtract("remind me to call mom at 10 next saturday", - "2017-07-01 10:00:00", "remind me to call mom") - testExtract("remind me to call mom at 10am next saturday", - "2017-07-01 10:00:00", "remind me to call mom") - # test yesterday - testExtract("what day was yesterday", - "2017-06-26 00:00:00", "what day was") - testExtract("what day was the day before yesterday", - "2017-06-25 00:00:00", "what day was") - testExtract("i had dinner yesterday at 6", - "2017-06-26 06:00:00", "i had dinner") - testExtract("i had dinner yesterday at 6 am", - "2017-06-26 06:00:00", "i had dinner") - testExtract("i had dinner yesterday at 6 pm", - "2017-06-26 18:00:00", "i had dinner") - - # Below two tests, ensure that time is picked - # even if no am/pm is specified - # in case of weekdays/tonight - testExtract("set alarm for 9 on weekdays", - "2017-06-27 21:00:00", "set alarm weekdays") - testExtract("for 8 tonight", - "2017-06-27 20:00:00", "") - testExtract("for 8:30pm tonight", - "2017-06-27 20:30:00", "") - # Tests a time with ':' & without am/pm - testExtract("set an alarm for tonight 9:30", - "2017-06-27 21:30:00", "set alarm") - testExtract("set an alarm at 9:00 for tonight", - "2017-06-27 21:00:00", "set alarm") - # Check if it picks the intent irrespective of correctness - testExtract("set an alarm at 9 o'clock for tonight", - "2017-06-27 21:00:00", "set alarm") - testExtract("remind me about the game tonight at 11:30", - "2017-06-27 23:30:00", "remind me about game") - testExtract("set alarm at 7:30 on weekdays", - "2017-06-27 19:30:00", "set alarm on weekdays") - - # "# days " - testExtract("my birthday is 2 days from today", - "2017-06-29 00:00:00", "my birthday is") - testExtract("my birthday is 2 days after today", - "2017-06-29 00:00:00", "my birthday is") - testExtract("my birthday is 2 days from tomorrow", - "2017-06-30 00:00:00", "my birthday is") - testExtract("my birthday is 2 days after tomorrow", - "2017-06-30 00:00:00", "my birthday is") - testExtract("remind me to call mom at 10am 2 days after next saturday", - "2017-07-10 10:00:00", "remind me to call mom") - testExtract("my birthday is 2 days from yesterday", - "2017-06-28 00:00:00", "my birthday is") - testExtract("my birthday is 2 days after yesterday", - "2017-06-28 00:00:00", "my birthday is") - - # "# days ago>" - testExtract("my birthday was 1 day ago", - "2017-06-26 00:00:00", "my birthday was") - testExtract("my birthday was 2 days ago", - "2017-06-25 00:00:00", "my birthday was") - testExtract("my birthday was 3 days ago", - "2017-06-24 00:00:00", "my birthday was") - testExtract("my birthday was 4 days ago", - "2017-06-23 00:00:00", "my birthday was") - # TODO this test is imperfect due to "tonight" in the reminder, but let is pass since the date is correct - testExtract("lets meet tonight", - "2017-06-27 22:00:00", "lets meet tonight") - # TODO this test is imperfect due to "at night" in the reminder, but let is pass since the date is correct - testExtract("lets meet later at night", - "2017-06-27 22:00:00", "lets meet later at night") - # TODO this test is imperfect due to "night" in the reminder, but let is pass since the date is correct - testExtract("what's the weather like tomorrow night", - "2017-06-28 22:00:00", "what is weather like night") - # TODO this test is imperfect due to "night" in the reminder, but let is pass since the date is correct - testExtract("what's the weather like next tuesday night", - "2017-07-04 22:00:00", "what is weather like night") - - def test_extract_ambiguous_time_en(self): - morning = datetime(2017, 6, 27, 8, 1, 2, tzinfo=default_timezone()) - evening = datetime(2017, 6, 27, 20, 1, 2, tzinfo=default_timezone()) - noonish = datetime(2017, 6, 27, 12, 1, 2, tzinfo=default_timezone()) - self.assertEqual( - extract_datetime('feed the fish'), None) - self.assertEqual( - extract_datetime('day'), None) - self.assertEqual( - extract_datetime('week'), None) - self.assertEqual( - extract_datetime('month'), None) - self.assertEqual( - extract_datetime('year'), None) - self.assertEqual( - extract_datetime(' '), None) - self.assertEqual( - extract_datetime('feed fish at 10 o\'clock', morning)[0], - datetime(2017, 6, 27, 10, 0, 0, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('feed fish at 10 o\'clock', noonish)[0], - datetime(2017, 6, 27, 22, 0, 0, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('feed fish at 10 o\'clock', evening)[0], - datetime(2017, 6, 27, 22, 0, 0, tzinfo=default_timezone())) - - def test_extract_date_with_may_I_en(self): - now = datetime(2019, 7, 4, 8, 1, 2, tzinfo=default_timezone()) - may_date = datetime(2019, 5, 2, 10, 11, 20, tzinfo=default_timezone()) - self.assertEqual( - extract_datetime('May I know what time it is tomorrow', now)[0], - datetime(2019, 7, 5, 0, 0, 0, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('May I when 10 o\'clock is', now)[0], - datetime(2019, 7, 4, 10, 0, 0, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('On 24th of may I want a reminder', may_date)[0], - datetime(2019, 5, 24, 0, 0, 0, tzinfo=default_timezone())) - - def test_extract_with_other_tzinfo(self): - local_tz = default_timezone() - local_dt = datetime(2019, 7, 4, 7, 1, 2, tzinfo=local_tz) - local_tz_offset = local_tz.utcoffset(local_dt) - not_local_offset = local_tz_offset + timedelta(hours=1) - not_local_tz = tz.tzoffset('TST', not_local_offset.total_seconds()) - not_local_dt = datetime(2019, 7, 4, 8, 1, 2, tzinfo=not_local_tz) - test_dt, remainder = extract_datetime("now is the time", not_local_dt) - self.assertEqual((test_dt.year, test_dt.month, test_dt.day, - test_dt.hour, test_dt.minute, test_dt.second, - test_dt.tzinfo), - (not_local_dt.year, not_local_dt.month, not_local_dt.day, - not_local_dt.hour, not_local_dt.minute, not_local_dt.second, - not_local_dt.tzinfo)) - self.assertNotEqual((test_dt.year, test_dt.month, test_dt.day, - test_dt.hour, test_dt.minute, test_dt.second, - test_dt.tzinfo), - (local_dt.year, local_dt.month, local_dt.day, - local_dt.hour, local_dt.minute, local_dt.second, - local_dt.tzinfo)) - - def test_extract_relativedatetime_en(self): - def extractWithFormat(text): - date = datetime(2017, 6, 27, 10, 1, 2, tzinfo=default_timezone()) - [extractedDate, leftover] = extract_datetime(text, date) - extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") - return [extractedDate, leftover] - - def testExtract(text, expected_date, expected_leftover): - res = extractWithFormat(normalize(text)) - self.assertEqual(res[0], expected_date, "for=" + text) - self.assertEqual(res[1], expected_leftover, "for=" + text) - - testExtract("lets meet in 5 minutes", - "2017-06-27 10:06:02", "lets meet") - testExtract("lets meet in 5minutes", - "2017-06-27 10:06:02", "lets meet") - testExtract("lets meet in 5 seconds", - "2017-06-27 10:01:07", "lets meet") - testExtract("lets meet in 1 hour", - "2017-06-27 11:01:02", "lets meet") - testExtract("lets meet in 2 hours", - "2017-06-27 12:01:02", "lets meet") - testExtract("lets meet in 2hours", - "2017-06-27 12:01:02", "lets meet") - testExtract("lets meet in 1 minute", - "2017-06-27 10:02:02", "lets meet") - testExtract("lets meet in 1 second", - "2017-06-27 10:01:03", "lets meet") - testExtract("lets meet in 5seconds", - "2017-06-27 10:01:07", "lets meet") - - def test_normalize_numbers(self): - self.assertEqual(normalize("remind me to do something at two to two"), - "remind me to do something at 2 to 2") - self.assertEqual(normalize('what time will it be in two minutes'), - 'what time will it be in 2 minutes') - self.assertEqual(normalize('What time will it be in twenty two minutes'), - 'What time will it be in 22 minutes') - self.assertEqual(normalize("remind me to do something at twenty to two"), - "remind me to do something at 20 to 2") - - # TODO imperfect test, maybe should return 'my favorite numbers are 20 2', - # let is pass for now since this is likely a STT issue if ever - # encountered in the wild and is somewhat ambiguous, if this was - # spoken by a human the result is what we expect, if in written form - # it is ambiguous but could mean separate numbers - self.assertEqual(normalize('my favorite numbers are twenty 2'), - 'my favorite numbers are 22') - # TODO imperfect test, same as above, fixing would impact - # extract_numbers quite a bit and require a non trivial ammount of - # refactoring - self.assertEqual(normalize('my favorite numbers are 20 2'), - 'my favorite numbers are 22') - - # test ordinals - self.assertEqual(normalize('this is the first'), - 'this is first') - self.assertEqual(normalize('this is the first second'), - 'this is first second') - self.assertEqual(normalize('this is the first second and third'), - 'this is first second and third') - - # test fractions - self.assertEqual(normalize('whole hour'), - 'whole hour') - self.assertEqual(normalize('quarter hour'), - 'quarter hour') - self.assertEqual(normalize('halve hour'), - 'halve hour') - self.assertEqual(normalize('half hour'), - 'half hour') - - def test_extract_date_with_number_words(self): - now = datetime(2019, 7, 4, 8, 1, 2, tzinfo=default_timezone()) - self.assertEqual( - extract_datetime('What time will it be in 2 minutes', now)[0], - datetime(2019, 7, 4, 8, 3, 2, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('What time will it be in two minutes', now)[0], - datetime(2019, 7, 4, 8, 3, 2, tzinfo=default_timezone())) - self.assertEqual( - extract_datetime('What time will it be in two hundred minutes', now)[0], - datetime(2019, 7, 4, 11, 21, 2, tzinfo=default_timezone())) - - def test_spaces(self): - self.assertEqual(normalize(" this is a test"), - "this is test") - self.assertEqual(normalize(" this is a test "), - "this is test") - self.assertEqual(normalize(" this is one test"), - "this is 1 test") - - def test_numbers(self): - self.assertEqual(normalize("this is a one two three test"), - "this is 1 2 3 test") - self.assertEqual(normalize(" it's a four five six test"), - "it is 4 5 6 test") - self.assertEqual(normalize("it's a seven eight nine test"), - "it is 7 8 9 test") - self.assertEqual(normalize("it's a seven eight nine test"), - "it is 7 8 9 test") - self.assertEqual(normalize("that's a ten eleven twelve test"), - "that is 10 11 12 test") - self.assertEqual(normalize("that's a thirteen fourteen test"), - "that is 13 14 test") - self.assertEqual(normalize("that's fifteen sixteen seventeen"), - "that is 15 16 17") - self.assertEqual(normalize("that's eighteen nineteen twenty"), - "that is 18 19 20") - self.assertEqual(normalize("that's one nineteen twenty two"), - "that is 1 19 22") - self.assertEqual(normalize("that's one hundred"), - "that is 100") - self.assertEqual(normalize("that's one two twenty two"), - "that is 1 2 22") - self.assertEqual(normalize("that's one and a half"), - "that is 1 and half") - self.assertEqual(normalize("that's one and a half and five six"), - "that is 1 and half and 5 6") - - def test_multiple_numbers(self): - self.assertEqual(extract_numbers("this is a one two three test"), - [1.0, 2.0, 3.0]) - self.assertEqual(extract_numbers("it's a four five six test"), - [4.0, 5.0, 6.0]) - self.assertEqual(extract_numbers("this is a ten eleven twelve test"), - [10.0, 11.0, 12.0]) - self.assertEqual(extract_numbers("this is a one twenty one test"), - [1.0, 21.0]) - self.assertEqual(extract_numbers("1 dog, seven pigs, macdonald had a " - "farm, 3 times 5 macarena"), - [1, 7, 3, 5]) - self.assertEqual(extract_numbers("two beers for two bears"), - [2.0, 2.0]) - self.assertEqual(extract_numbers("twenty 20 twenty"), - [20, 20, 20]) - self.assertEqual(extract_numbers("twenty 20 22"), - [20.0, 20.0, 22.0]) - self.assertEqual(extract_numbers("twenty twenty two twenty"), - [20, 22, 20]) - self.assertEqual(extract_numbers("twenty 2"), - [22.0]) - self.assertEqual(extract_numbers("twenty 20 twenty 2"), - [20, 20, 22]) - self.assertEqual(extract_numbers("third one"), - [1 / 3, 1]) - self.assertEqual(extract_numbers("third one", ordinals=True), [3]) - self.assertEqual(extract_numbers("six trillion", short_scale=True), - [6e12]) - self.assertEqual(extract_numbers("six trillion", short_scale=False), - [6e18]) - self.assertEqual(extract_numbers("two pigs and six trillion bacteria", - short_scale=True), [2, 6e12]) - self.assertEqual(extract_numbers("two pigs and six trillion bacteria", - short_scale=False), [2, 6e18]) - self.assertEqual(extract_numbers("thirty second or first", - ordinals=True), [32, 1]) - self.assertEqual(extract_numbers("this is a seven eight nine and a" - " half test"), - [7.0, 8.0, 9.5]) - - def test_contractions(self): - self.assertEqual(normalize("ain't"), "is not") - self.assertEqual(normalize("aren't"), "are not") - self.assertEqual(normalize("can't"), "can not") - self.assertEqual(normalize("could've"), "could have") - self.assertEqual(normalize("couldn't"), "could not") - self.assertEqual(normalize("didn't"), "did not") - self.assertEqual(normalize("doesn't"), "does not") - self.assertEqual(normalize("don't"), "do not") - self.assertEqual(normalize("gonna"), "going to") - self.assertEqual(normalize("gotta"), "got to") - self.assertEqual(normalize("hadn't"), "had not") - self.assertEqual(normalize("hadn't have"), "had not have") - self.assertEqual(normalize("hasn't"), "has not") - self.assertEqual(normalize("haven't"), "have not") - # TODO: Ambiguous with "he had" - self.assertEqual(normalize("he'd"), "he would") - self.assertEqual(normalize("he'll"), "he will") - # TODO: Ambiguous with "he has" - self.assertEqual(normalize("he's"), "he is") - # TODO: Ambiguous with "how would" - self.assertEqual(normalize("how'd"), "how did") - self.assertEqual(normalize("how'll"), "how will") - # TODO: Ambiguous with "how has" and "how does" - self.assertEqual(normalize("how's"), "how is") - # TODO: Ambiguous with "I had" - self.assertEqual(normalize("I'd"), "I would") - self.assertEqual(normalize("I'll"), "I will") - self.assertEqual(normalize("I'm"), "I am") - self.assertEqual(normalize("I've"), "I have") - self.assertEqual(normalize("I haven't"), "I have not") - self.assertEqual(normalize("isn't"), "is not") - self.assertEqual(normalize("it'd"), "it would") - self.assertEqual(normalize("it'll"), "it will") - # TODO: Ambiguous with "it has" - self.assertEqual(normalize("it's"), "it is") - self.assertEqual(normalize("it isn't"), "it is not") - self.assertEqual(normalize("mightn't"), "might not") - self.assertEqual(normalize("might've"), "might have") - self.assertEqual(normalize("mustn't"), "must not") - self.assertEqual(normalize("mustn't have"), "must not have") - self.assertEqual(normalize("must've"), "must have") - self.assertEqual(normalize("needn't"), "need not") - self.assertEqual(normalize("oughtn't"), "ought not") - self.assertEqual(normalize("shan't"), "shall not") - # TODO: Ambiguous wiht "she had" - self.assertEqual(normalize("she'd"), "she would") - self.assertEqual(normalize("she hadn't"), "she had not") - self.assertEqual(normalize("she'll"), "she will") - self.assertEqual(normalize("she's"), "she is") - self.assertEqual(normalize("she isn't"), "she is not") - self.assertEqual(normalize("should've"), "should have") - self.assertEqual(normalize("shouldn't"), "should not") - self.assertEqual(normalize("shouldn't have"), "should not have") - self.assertEqual(normalize("somebody's"), "somebody is") - # TODO: Ambiguous with "someone had" - self.assertEqual(normalize("someone'd"), "someone would") - self.assertEqual(normalize("someone hadn't"), "someone had not") - self.assertEqual(normalize("someone'll"), "someone will") - # TODO: Ambiguous with "someone has" - self.assertEqual(normalize("someone's"), "someone is") - self.assertEqual(normalize("that'll"), "that will") - # TODO: Ambiguous with "that has" - self.assertEqual(normalize("that's"), "that is") - # TODO: Ambiguous with "that had" - self.assertEqual(normalize("that'd"), "that would") - # TODO: Ambiguous with "there had" - self.assertEqual(normalize("there'd"), "there would") - self.assertEqual(normalize("there're"), "there are") - # TODO: Ambiguous with "there has" - self.assertEqual(normalize("there's"), "there is") - # TODO: Ambiguous with "they had" - self.assertEqual(normalize("they'd"), "they would") - self.assertEqual(normalize("they'll"), "they will") - self.assertEqual(normalize("they won't have"), "they will not have") - self.assertEqual(normalize("they're"), "they are") - self.assertEqual(normalize("they've"), "they have") - self.assertEqual(normalize("they haven't"), "they have not") - self.assertEqual(normalize("wasn't"), "was not") - # TODO: Ambiguous wiht "we had" - self.assertEqual(normalize("we'd"), "we would") - self.assertEqual(normalize("we would've"), "we would have") - self.assertEqual(normalize("we wouldn't"), "we would not") - self.assertEqual(normalize("we wouldn't have"), "we would not have") - self.assertEqual(normalize("we'll"), "we will") - self.assertEqual(normalize("we won't have"), "we will not have") - self.assertEqual(normalize("we're"), "we are") - self.assertEqual(normalize("we've"), "we have") - self.assertEqual(normalize("weren't"), "were not") - self.assertEqual(normalize("what'd"), "what did") - self.assertEqual(normalize("what'll"), "what will") - self.assertEqual(normalize("what're"), "what are") - # TODO: Ambiguous with "what has" / "what does") - self.assertEqual(normalize("whats"), "what is") - self.assertEqual(normalize("what's"), "what is") - self.assertEqual(normalize("what've"), "what have") - # TODO: Ambiguous with "when has" - self.assertEqual(normalize("when's"), "when is") - self.assertEqual(normalize("where'd"), "where did") - # TODO: Ambiguous with "where has" / where does" - self.assertEqual(normalize("where's"), "where is") - self.assertEqual(normalize("where've"), "where have") - # TODO: Ambiguous with "who had" "who did") - self.assertEqual(normalize("who'd"), "who would") - self.assertEqual(normalize("who'd've"), "who would have") - self.assertEqual(normalize("who'll"), "who will") - self.assertEqual(normalize("who're"), "who are") - # TODO: Ambiguous with "who has" / "who does" - self.assertEqual(normalize("who's"), "who is") - self.assertEqual(normalize("who've"), "who have") - self.assertEqual(normalize("why'd"), "why did") - self.assertEqual(normalize("why're"), "why are") - # TODO: Ambiguous with "why has" / "why does" - self.assertEqual(normalize("why's"), "why is") - self.assertEqual(normalize("won't"), "will not") - self.assertEqual(normalize("won't've"), "will not have") - self.assertEqual(normalize("would've"), "would have") - self.assertEqual(normalize("wouldn't"), "would not") - self.assertEqual(normalize("wouldn't've"), "would not have") - self.assertEqual(normalize("ya'll"), "you all") - self.assertEqual(normalize("y'all"), "you all") - self.assertEqual(normalize("y'ain't"), "you are not") - # TODO: Ambiguous with "you had" - self.assertEqual(normalize("you'd"), "you would") - self.assertEqual(normalize("you'd've"), "you would have") - self.assertEqual(normalize("you'll"), "you will") - self.assertEqual(normalize("you're"), "you are") - self.assertEqual(normalize("you aren't"), "you are not") - self.assertEqual(normalize("you've"), "you have") - self.assertEqual(normalize("you haven't"), "you have not") - - def test_combinations(self): - self.assertEqual(normalize("I couldn't have guessed there'd be two"), - "I could not have guessed there would be 2") - self.assertEqual(normalize("I wouldn't have"), "I would not have") - self.assertEqual(normalize("I hadn't been there"), - "I had not been there") - self.assertEqual(normalize("I would've"), "I would have") - self.assertEqual(normalize("it hadn't"), "it had not") - self.assertEqual(normalize("it hadn't have"), "it had not have") - self.assertEqual(normalize("it would've"), "it would have") - self.assertEqual(normalize("she wouldn't have"), "she would not have") - self.assertEqual(normalize("she would've"), "she would have") - self.assertEqual(normalize("someone wouldn't have"), - "someone would not have") - self.assertEqual(normalize("someone would've"), "someone would have") - self.assertEqual(normalize("what's the weather like"), - "what is weather like") - self.assertEqual(normalize("that's what I told you"), - "that is what I told you") - - self.assertEqual(normalize("whats 8 + 4"), "what is 8 + 4") - - # TODO not localized; needed in english? - def test_gender(self): - self.assertRaises((AttributeError, FunctionNotLocalizedError), - get_gender, "person", None) - - if __name__ == "__main__": unittest.main() diff --git a/test/test_parse_en.py b/test/test_parse_en.py new file mode 100644 index 00000000..e61802b7 --- /dev/null +++ b/test/test_parse_en.py @@ -0,0 +1,1072 @@ +# +# Copyright 2017 Mycroft AI Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +import unittest +from datetime import datetime, timedelta +from dateutil import tz + +from lingua_franca import load_language, unload_language, set_default_lang +from lingua_franca.internal import FunctionNotLocalizedError +from lingua_franca.time import default_timezone, now_local, set_default_tz +from lingua_franca.parse import extract_datetime +from lingua_franca.parse import extract_duration +from lingua_franca.parse import extract_number, extract_numbers +from lingua_franca.parse import fuzzy_match +from lingua_franca.parse import get_gender +from lingua_franca.parse import match_one +from lingua_franca.parse import normalize + + +def setUpModule(): + # TODO spin off English tests + load_language('en') + set_default_lang('en') + + +def tearDownModule(): + unload_language('en') + + +class TestNormalize(unittest.TestCase): + def test_articles(self): + self.assertEqual(normalize("this is a test", remove_articles=True), + "this is test") + self.assertEqual(normalize("this is the test", remove_articles=True), + "this is test") + self.assertEqual(normalize("and another test", remove_articles=True), + "and another test") + self.assertEqual(normalize("this is an extra test", + remove_articles=False), + "this is an extra test") + + def test_extract_number_priority(self): + # sanity check + self.assertEqual(extract_number("third", ordinals=True), 3) + self.assertEqual(extract_number("sixth", ordinals=True), 6) + + # TODO a suite of tests needs to be written depending on outcome of + # https://github.com/MycroftAI/lingua-franca/issues/152 + # the tests bellow are flagged as problematic, some of those ARE BROKEN + # for now this is considered undefined behaviour!!! + + # NOTE this test is returning the first number, which seems to be + # the consensus regarding correct behaviour + self.assertEqual(extract_number("Twenty two and Three Fifths", + ordinals=True), 22) + + # TODO these should return the 1st number, not the last, ordinals + # seem messed up, the rest of the codebase is returning first + # number most likely tests bellow are bugs, i repeat, tests bellow + # are testing FOR THE "WRONG" VALUE + self.assertEqual(extract_number("sixth third", ordinals=True), 3) + self.assertEqual(extract_number("third sixth", ordinals=True), 6) + + def test_extract_number_ambiguous(self): + # test explicit ordinals + self.assertEqual(extract_number("this is the 1st", + ordinals=True), 1) + self.assertEqual(extract_number("this is the 2nd", + ordinals=False), 2) + self.assertEqual(extract_number("this is the 3rd", + ordinals=None), 3) + self.assertEqual(extract_number("this is the 4th", + ordinals=None), 4) + self.assertEqual(extract_number( + "this is the 7th test", ordinals=True), 7) + self.assertEqual(extract_number( + "this is the 7th test", ordinals=False), 7) + self.assertTrue(extract_number("this is the nth test") is False) + self.assertEqual(extract_number("this is the 1st test"), 1) + self.assertEqual(extract_number("this is the 2nd test"), 2) + self.assertEqual(extract_number("this is the 3rd test"), 3) + self.assertEqual(extract_number("this is the 31st test"), 31) + self.assertEqual(extract_number("this is the 32nd test"), 32) + self.assertEqual(extract_number("this is the 33rd test"), 33) + self.assertEqual(extract_number("this is the 34th test"), 34) + + # test non ambiguous ordinals + self.assertEqual(extract_number("this is the first test", + ordinals=True), 1) + self.assertEqual(extract_number("this is the first test", + ordinals=False), False) + self.assertEqual(extract_number("this is the first test", + ordinals=None), False) + + # test ambiguous ordinal/time unit + self.assertEqual(extract_number("this is second test", + ordinals=True), 2) + self.assertEqual(extract_number("this is second test", + ordinals=False), False) + self.assertEqual(extract_number("remind me in a second", + ordinals=True), 2) + self.assertEqual(extract_number("remind me in a second", + ordinals=False), False) + self.assertEqual(extract_number("remind me in a second", + ordinals=None), False) + + # test ambiguous ordinal/fractional + self.assertEqual(extract_number("this is the third test", + ordinals=True), 3.0) + self.assertEqual(extract_number("this is the third test", + ordinals=False), 1.0 / 3.0) + self.assertEqual(extract_number("this is the third test", + ordinals=None), False) + + self.assertEqual(extract_number("one third of a cup", + ordinals=False), 1.0 / 3.0) + self.assertEqual(extract_number("one third of a cup", + ordinals=True), 3) + self.assertEqual(extract_number("one third of a cup", + ordinals=None), 1) + + # test plurals + # NOTE plurals are never considered ordinals, but also not + # considered explicit fractions + self.assertEqual(extract_number("2 fifths", + ordinals=True), 2) + self.assertEqual(extract_number("2 fifth", + ordinals=True), 5) + self.assertEqual(extract_number("2 fifths", + ordinals=False), 2/5) + self.assertEqual(extract_number("2 fifths", + ordinals=None), 2) + + self.assertEqual(extract_number("Twenty two and Three Fifths"), 22.6) + + # test multiple ambiguous + self.assertEqual(extract_number("sixth third", ordinals=None), False) + self.assertEqual(extract_number("thirty second", ordinals=False), 30) + self.assertEqual(extract_number("thirty second", ordinals=None), 30) + self.assertEqual(extract_number("thirty second", ordinals=True), 32) + # TODO this test is imperfect, further discussion needed + # "Sixth third" would probably refer to "the sixth instance of a third" + # I dunno what should be returned here, don't think it should be cumulative. + self.assertEqual(extract_number("sixth third", ordinals=False), + 1 / 6 / 3) + + # test big numbers / short vs long scale + self.assertEqual(extract_number("this is the billionth test", + ordinals=True), 1e09) + self.assertEqual(extract_number("this is the billionth test", + ordinals=None), False) + + self.assertEqual(extract_number("this is the billionth test", + ordinals=False), 1e-9) + self.assertEqual(extract_number("this is the billionth test", + ordinals=True, + short_scale=False), 1e12) + self.assertEqual(extract_number("this is the billionth test", + ordinals=None, + short_scale=False), False) + self.assertEqual(extract_number("this is the billionth test", + short_scale=False), 1e-12) + + # test the Nth one + self.assertEqual(extract_number("the fourth one", ordinals=True), 4.0) + self.assertEqual(extract_number("the thirty sixth one", + ordinals=True), 36.0) + self.assertEqual(extract_number( + "you are the second one", ordinals=False), 1) + self.assertEqual(extract_number( + "you are the second one", ordinals=True), 2) + self.assertEqual(extract_number("you are the 1st one", + ordinals=None), 1) + self.assertEqual(extract_number("you are the 2nd one", + ordinals=None), 2) + self.assertEqual(extract_number("you are the 3rd one", + ordinals=None), 3) + self.assertEqual(extract_number("you are the 8th one", + ordinals=None), 8) + + def test_extract_number(self): + + self.assertEqual(extract_number("this is 2 test"), 2) + self.assertEqual(extract_number("this is test number 4"), 4) + self.assertEqual(extract_number("three cups"), 3) + self.assertEqual(extract_number("1/3 cups"), 1.0 / 3.0) + self.assertEqual(extract_number("quarter cup"), 0.25) + self.assertEqual(extract_number("1/4 cup"), 0.25) + self.assertEqual(extract_number("one fourth cup"), 0.25) + self.assertEqual(extract_number("2/3 cups"), 2.0 / 3.0) + self.assertEqual(extract_number("3/4 cups"), 3.0 / 4.0) + self.assertEqual(extract_number("1 and 3/4 cups"), 1.75) + self.assertEqual(extract_number("1 cup and a half"), 1.5) + self.assertEqual(extract_number("one cup and a half"), 1.5) + self.assertEqual(extract_number("one and a half cups"), 1.5) + self.assertEqual(extract_number("one and one half cups"), 1.5) + self.assertEqual(extract_number("three quarter cups"), 3.0 / 4.0) + self.assertEqual(extract_number("three quarters cups"), 3.0 / 4.0) + self.assertEqual(extract_number("twenty two"), 22) + self.assertEqual(extract_number( + "Twenty two with a leading capital letter"), 22) + self.assertEqual(extract_number( + "twenty Two with Two capital letters"), 22) + self.assertEqual(extract_number( + "twenty Two with mixed capital letters"), 22) + self.assertEqual(extract_number("two hundred"), 200) + self.assertEqual(extract_number("nine thousand"), 9000) + self.assertEqual(extract_number("six hundred sixty six"), 666) + self.assertEqual(extract_number("two million"), 2000000) + self.assertEqual(extract_number("two million five hundred thousand " + "tons of spinning metal"), 2500000) + self.assertEqual(extract_number("six trillion"), 6000000000000.0) + self.assertEqual(extract_number("six trillion", short_scale=False), + 6e+18) + self.assertEqual(extract_number("one point five"), 1.5) + self.assertEqual(extract_number("three dot fourteen"), 3.14) + self.assertEqual(extract_number("zero point two"), 0.2) + self.assertEqual(extract_number("billions of years older"), + 1000000000.0) + self.assertEqual(extract_number("billions of years older", + short_scale=False), + 1000000000000.0) + self.assertEqual(extract_number("one hundred thousand"), 100000) + self.assertEqual(extract_number("minus 2"), -2) + self.assertEqual(extract_number("negative seventy"), -70) + self.assertEqual(extract_number("thousand million"), 1000000000) + + # Verify non-power multiples of ten no longer discard + # adjacent multipliers + self.assertEqual(extract_number("twenty thousand"), 20000) + self.assertEqual(extract_number("fifty million"), 50000000) + + # Verify smaller powers of ten no longer cause miscalculation of larger + # powers of ten (see MycroftAI#86) + self.assertEqual(extract_number("twenty billion three hundred million \ + nine hundred fifty thousand six hundred \ + seventy five point eight"), + 20300950675.8) + self.assertEqual(extract_number("nine hundred ninety nine million nine \ + hundred ninety nine thousand nine \ + hundred ninety nine point nine"), + 999999999.9) + + # TODO why does "trillion" result in xxxx.0? + self.assertEqual(extract_number("eight hundred trillion two hundred \ + fifty seven"), 800000000000257.0) + + # TODO handle this case + # self.assertEqual( + # extract_number("6 dot six six six"), + # 6.666) + self.assertTrue(extract_number("The tennis player is fast") is False) + self.assertTrue(extract_number("fraggle") is False) + + self.assertTrue(extract_number("fraggle zero") is not False) + self.assertEqual(extract_number("fraggle zero"), 0) + + self.assertTrue(extract_number("grobo 0") is not False) + self.assertEqual(extract_number("grobo 0"), 0) + + self.assertEqual(extract_number("a couple of beers"), 2) + self.assertEqual(extract_number("a couple hundred beers"), 200) + self.assertEqual(extract_number("a couple thousand beers"), 2000) + self.assertEqual(extract_number("totally 100%"), 100) + + def test_extract_duration_en(self): + self.assertEqual(extract_duration("10 seconds"), + (timedelta(seconds=10.0), "")) + self.assertEqual(extract_duration("5 minutes"), + (timedelta(minutes=5), "")) + self.assertEqual(extract_duration("2 hours"), + (timedelta(hours=2), "")) + self.assertEqual(extract_duration("3 days"), + (timedelta(days=3), "")) + self.assertEqual(extract_duration("25 weeks"), + (timedelta(weeks=25), "")) + self.assertEqual(extract_duration("seven hours"), + (timedelta(hours=7), "")) + self.assertEqual(extract_duration("7.5 seconds"), + (timedelta(seconds=7.5), "")) + self.assertEqual(extract_duration("eight and a half days thirty" + " nine seconds"), + (timedelta(days=8.5, seconds=39), "")) + self.assertEqual(extract_duration("wake me up in three weeks, four" + " hundred ninety seven days, and" + " three hundred 91.6 seconds"), + (timedelta(weeks=3, days=497, seconds=391.6), + "wake me up in , , and")) + self.assertEqual(extract_duration("10-seconds"), + (timedelta(seconds=10.0), "")) + self.assertEqual(extract_duration("5-minutes"), + (timedelta(minutes=5), "")) + + def test_extract_duration_case_en(self): + self.assertEqual(extract_duration("Set a timer for 30 minutes"), + (timedelta(minutes=30), "Set a timer for")) + self.assertEqual(extract_duration("The movie is one hour, fifty seven" + " and a half minutes long"), + (timedelta(hours=1, minutes=57.5), + "The movie is , long")) + self.assertEqual(extract_duration("Four and a Half minutes until" + " sunset"), + (timedelta(minutes=4.5), "until sunset")) + self.assertEqual(extract_duration("Nineteen minutes past THE hour"), + (timedelta(minutes=19), "past THE hour")) + + def test_extractdatetime_fractions_en(self): + def extractWithFormat(text): + date = datetime(2017, 6, 27, 13, 4, tzinfo=default_timezone()) # Tue June 27, 2017 @ 1:04pm + [extractedDate, leftover] = extract_datetime(text, date) + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtract(text, expected_date, expected_leftover): + res = extractWithFormat(normalize(text)) + self.assertEqual(res[0], expected_date, "for=" + text) + self.assertEqual(res[1], expected_leftover, "for=" + text) + + testExtract("Set the ambush for half an hour", + "2017-06-27 13:34:00", "set ambush") + testExtract("remind me to call mom in half an hour", + "2017-06-27 13:34:00", "remind me to call mom") + testExtract("remind me to call mom in a half hour", + "2017-06-27 13:34:00", "remind me to call mom") + testExtract("remind me to call mom in a quarter hour", + "2017-06-27 13:19:00", "remind me to call mom") + testExtract("remind me to call mom in a quarter of an hour", + "2017-06-27 13:19:00", "remind me to call mom") + + def test_extractdatetime_en(self): + def extractWithFormat(text): + date = datetime(2017, 6, 27, 13, 4, tzinfo=default_timezone()) # Tue June 27, 2017 @ 1:04pm + [extractedDate, leftover] = extract_datetime(text, date) + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtract(text, expected_date, expected_leftover): + res = extractWithFormat(normalize(text)) + self.assertEqual(res[0], expected_date, "for=" + text) + self.assertEqual(res[1], expected_leftover, "for=" + text) + + testExtract("now is the time", + "2017-06-27 13:04:00", "is time") + testExtract("in a second", + "2017-06-27 13:04:01", "") + testExtract("in a minute", + "2017-06-27 13:05:00", "") + testExtract("in a couple minutes", + "2017-06-27 13:06:00", "") + testExtract("in a couple of minutes", + "2017-06-27 13:06:00", "") + testExtract("in a couple hours", + "2017-06-27 15:04:00", "") + testExtract("in a couple of hours", + "2017-06-27 15:04:00", "") + testExtract("in a couple weeks", + "2017-07-11 00:00:00", "") + testExtract("in a couple of weeks", + "2017-07-11 00:00:00", "") + testExtract("in a couple months", + "2017-08-27 00:00:00", "") + testExtract("in a couple years", + "2019-06-27 00:00:00", "") + testExtract("in a couple of months", + "2017-08-27 00:00:00", "") + testExtract("in a couple of years", + "2019-06-27 00:00:00", "") + testExtract("in a decade", + "2027-06-27 00:00:00", "") + testExtract("in a couple of decades", + "2037-06-27 00:00:00", "") + testExtract("next decade", + "2027-06-27 00:00:00", "") + testExtract("in a century", + "2117-06-27 00:00:00", "") + testExtract("in a millennium", + "3017-06-27 00:00:00", "") + testExtract("in a couple decades", + "2037-06-27 00:00:00", "") + testExtract("in 5 decades", + "2067-06-27 00:00:00", "") + testExtract("in a couple centuries", + "2217-06-27 00:00:00", "") + testExtract("in a couple of centuries", + "2217-06-27 00:00:00", "") + testExtract("in 2 centuries", + "2217-06-27 00:00:00", "") + testExtract("in a couple millenniums", + "4017-06-27 00:00:00", "") + testExtract("in a couple of millenniums", + "4017-06-27 00:00:00", "") + testExtract("in an hour", + "2017-06-27 14:04:00", "") + testExtract("i want it within the hour", + "2017-06-27 14:04:00", "i want it") + testExtract("in 1 second", + "2017-06-27 13:04:01", "") + testExtract("in 2 seconds", + "2017-06-27 13:04:02", "") + testExtract("Set the ambush in 1 minute", + "2017-06-27 13:05:00", "set ambush") + testExtract("Set the ambush for 5 days from today", + "2017-07-02 00:00:00", "set ambush") + testExtract("day after tomorrow", + "2017-06-29 00:00:00", "") + testExtract("What is the day after tomorrow's weather?", + "2017-06-29 00:00:00", "what is weather") + testExtract("Remind me at 10:45 pm", + "2017-06-27 22:45:00", "remind me") + testExtract("what is the weather on friday morning", + "2017-06-30 08:00:00", "what is weather") + testExtract("what is tomorrow's weather", + "2017-06-28 00:00:00", "what is weather") + testExtract("what is this afternoon's weather", + "2017-06-27 15:00:00", "what is weather") + testExtract("what is this evening's weather", + "2017-06-27 19:00:00", "what is weather") + testExtract("what was this morning's weather", + "2017-06-27 08:00:00", "what was weather") + testExtract("remind me to call mom in 8 weeks and 2 days", + "2017-08-24 00:00:00", "remind me to call mom") + testExtract("remind me to call mom on august 3rd", + "2017-08-03 00:00:00", "remind me to call mom") + testExtract("remind me tomorrow to call mom at 7am", + "2017-06-28 07:00:00", "remind me to call mom") + testExtract("remind me tomorrow to call mom at 10pm", + "2017-06-28 22:00:00", "remind me to call mom") + testExtract("remind me to call mom at 7am", + "2017-06-28 07:00:00", "remind me to call mom") + testExtract("remind me to call mom in an hour", + "2017-06-27 14:04:00", "remind me to call mom") + testExtract("remind me to call mom at 1730", + "2017-06-27 17:30:00", "remind me to call mom") + testExtract("remind me to call mom at 0630", + "2017-06-28 06:30:00", "remind me to call mom") + testExtract("remind me to call mom at 06 30 hours", + "2017-06-28 06:30:00", "remind me to call mom") + testExtract("remind me to call mom at 06 30", + "2017-06-28 06:30:00", "remind me to call mom") + testExtract("remind me to call mom at 06 30 hours", + "2017-06-28 06:30:00", "remind me to call mom") + testExtract("remind me to call mom at 7 o'clock", + "2017-06-27 19:00:00", "remind me to call mom") + testExtract("remind me to call mom this evening at 7 o'clock", + "2017-06-27 19:00:00", "remind me to call mom") + testExtract("remind me to call mom at 7 o'clock tonight", + "2017-06-27 19:00:00", "remind me to call mom") + testExtract("remind me to call mom at 7 o'clock in the morning", + "2017-06-28 07:00:00", "remind me to call mom") + testExtract("remind me to call mom Thursday evening at 7 o'clock", + "2017-06-29 19:00:00", "remind me to call mom") + testExtract("remind me to call mom Thursday morning at 7 o'clock", + "2017-06-29 07:00:00", "remind me to call mom") + testExtract("remind me to call mom at 7 o'clock Thursday morning", + "2017-06-29 07:00:00", "remind me to call mom") + testExtract("remind me to call mom at 7:00 Thursday morning", + "2017-06-29 07:00:00", "remind me to call mom") + # TODO: This test is imperfect due to the "at 7:00" still in the + # remainder. But let it pass for now since time is correct + testExtract("remind me to call mom at 7:00 Thursday evening", + "2017-06-29 19:00:00", "remind me to call mom at 7:00") + testExtract("remind me to call mom at 8 Wednesday evening", + "2017-06-28 20:00:00", "remind me to call mom") + testExtract("remind me to call mom at 8 Wednesday in the evening", + "2017-06-28 20:00:00", "remind me to call mom") + testExtract("remind me to call mom Wednesday evening at 8", + "2017-06-28 20:00:00", "remind me to call mom") + testExtract("remind me to call mom in two hours", + "2017-06-27 15:04:00", "remind me to call mom") + testExtract("remind me to call mom in 2 hours", + "2017-06-27 15:04:00", "remind me to call mom") + testExtract("remind me to call mom in 15 minutes", + "2017-06-27 13:19:00", "remind me to call mom") + testExtract("remind me to call mom in fifteen minutes", + "2017-06-27 13:19:00", "remind me to call mom") + testExtract("remind me to call mom at 10am 2 days after this saturday", + "2017-07-03 10:00:00", "remind me to call mom") + testExtract("Play Rick Astley music 2 days from Friday", + "2017-07-02 00:00:00", "play rick astley music") + testExtract("Begin the invasion at 3:45 pm on Thursday", + "2017-06-29 15:45:00", "begin invasion") + testExtract("On Monday, order pie from the bakery", + "2017-07-03 00:00:00", "order pie from bakery") + testExtract("Play Happy Birthday music 5 years from today", + "2022-06-27 00:00:00", "play happy birthday music") + testExtract("Skype Mom at 12:45 pm next Thursday", + "2017-07-06 12:45:00", "skype mom") + testExtract("What's the weather next Friday?", + "2017-06-30 00:00:00", "what weather") + testExtract("What's the weather next Wednesday?", + "2017-07-05 00:00:00", "what weather") + testExtract("What's the weather next Thursday?", + "2017-07-06 00:00:00", "what weather") + testExtract("what is the weather next friday morning", + "2017-06-30 08:00:00", "what is weather") + testExtract("what is the weather next friday evening", + "2017-06-30 19:00:00", "what is weather") + testExtract("what is the weather next friday afternoon", + "2017-06-30 15:00:00", "what is weather") + testExtract("remind me to call mom on august 3rd", + "2017-08-03 00:00:00", "remind me to call mom") + testExtract("Buy fireworks on the 4th of July", + "2017-07-04 00:00:00", "buy fireworks") + testExtract("what is the weather 2 weeks from next friday", + "2017-07-14 00:00:00", "what is weather") + testExtract("what is the weather wednesday at 0700 hours", + "2017-06-28 07:00:00", "what is weather") + testExtract("set an alarm wednesday at 7 o'clock", + "2017-06-28 07:00:00", "set alarm") + testExtract("Set up an appointment at 12:45 pm next Thursday", + "2017-07-06 12:45:00", "set up appointment") + testExtract("What's the weather this Thursday?", + "2017-06-29 00:00:00", "what weather") + testExtract("set up the visit for 2 weeks and 6 days from Saturday", + "2017-07-21 00:00:00", "set up visit") + testExtract("Begin the invasion at 03 45 on Thursday", + "2017-06-29 03:45:00", "begin invasion") + testExtract("Begin the invasion at o 800 hours on Thursday", + "2017-06-29 08:00:00", "begin invasion") + testExtract("Begin the party at 8 o'clock in the evening on Thursday", + "2017-06-29 20:00:00", "begin party") + testExtract("Begin the invasion at 8 in the evening on Thursday", + "2017-06-29 20:00:00", "begin invasion") + testExtract("Begin the invasion on Thursday at noon", + "2017-06-29 12:00:00", "begin invasion") + testExtract("Begin the invasion on Thursday at midnight", + "2017-06-29 00:00:00", "begin invasion") + testExtract("Begin the invasion on Thursday at 0500", + "2017-06-29 05:00:00", "begin invasion") + testExtract("remind me to wake up in 4 years", + "2021-06-27 00:00:00", "remind me to wake up") + testExtract("remind me to wake up in 4 years and 4 days", + "2021-07-01 00:00:00", "remind me to wake up") + testExtract("What is the weather 3 days after tomorrow?", + "2017-07-01 00:00:00", "what is weather") + testExtract("december 3", + "2017-12-03 00:00:00", "") + testExtract("lets meet at 8:00 tonight", + "2017-06-27 20:00:00", "lets meet") + testExtract("lets meet at 5pm", + "2017-06-27 17:00:00", "lets meet") + testExtract("lets meet at 8 a.m.", + "2017-06-28 08:00:00", "lets meet") + testExtract("remind me to wake up at 8 a.m", + "2017-06-28 08:00:00", "remind me to wake up") + testExtract("what is the weather on tuesday", + "2017-06-27 00:00:00", "what is weather") + testExtract("what is the weather on monday", + "2017-07-03 00:00:00", "what is weather") + testExtract("what is the weather this wednesday", + "2017-06-28 00:00:00", "what is weather") + testExtract("on thursday what is the weather", + "2017-06-29 00:00:00", "what is weather") + testExtract("on this thursday what is the weather", + "2017-06-29 00:00:00", "what is weather") + testExtract("on last monday what was the weather", + "2017-06-26 00:00:00", "what was weather") + testExtract("set an alarm for wednesday evening at 8", + "2017-06-28 20:00:00", "set alarm") + testExtract("set an alarm for wednesday at 3 o'clock in the afternoon", + "2017-06-28 15:00:00", "set alarm") + testExtract("set an alarm for wednesday at 3 o'clock in the morning", + "2017-06-28 03:00:00", "set alarm") + testExtract("set an alarm for wednesday morning at 7 o'clock", + "2017-06-28 07:00:00", "set alarm") + testExtract("set an alarm for today at 7 o'clock", + "2017-06-27 19:00:00", "set alarm") + testExtract("set an alarm for this evening at 7 o'clock", + "2017-06-27 19:00:00", "set alarm") + # TODO: This test is imperfect due to the "at 7:00" still in the + # remainder. But let it pass for now since time is correct + testExtract("set an alarm for this evening at 7:00", + "2017-06-27 19:00:00", "set alarm at 7:00") + testExtract("on the evening of june 5th 2017 remind me to" + + " call my mother", + "2017-06-05 19:00:00", "remind me to call my mother") + # TODO: This test is imperfect due to the missing "for" in the + # remainder. But let it pass for now since time is correct + testExtract("update my calendar for a morning meeting with julius" + + " on march 4th", + "2018-03-04 08:00:00", + "update my calendar meeting with julius") + testExtract("remind me to call mom next tuesday", + "2017-07-04 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 3 weeks", + "2017-07-18 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 8 weeks", + "2017-08-22 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 8 weeks and 2 days", + "2017-08-24 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 4 days", + "2017-07-01 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 3 months", + "2017-09-27 00:00:00", "remind me to call mom") + testExtract("remind me to call mom in 2 years and 2 days", + "2019-06-29 00:00:00", "remind me to call mom") + testExtract("remind me to call mom next week", + "2017-07-04 00:00:00", "remind me to call mom") + testExtract("remind me to call mom at 10am on saturday", + "2017-07-01 10:00:00", "remind me to call mom") + testExtract("remind me to call mom at 10am this saturday", + "2017-07-01 10:00:00", "remind me to call mom") + testExtract("remind me to call mom at 10 next saturday", + "2017-07-01 10:00:00", "remind me to call mom") + testExtract("remind me to call mom at 10am next saturday", + "2017-07-01 10:00:00", "remind me to call mom") + # test yesterday + testExtract("what day was yesterday", + "2017-06-26 00:00:00", "what day was") + testExtract("what day was the day before yesterday", + "2017-06-25 00:00:00", "what day was") + testExtract("i had dinner yesterday at 6", + "2017-06-26 06:00:00", "i had dinner") + testExtract("i had dinner yesterday at 6 am", + "2017-06-26 06:00:00", "i had dinner") + testExtract("i had dinner yesterday at 6 pm", + "2017-06-26 18:00:00", "i had dinner") + + # Below two tests, ensure that time is picked + # even if no am/pm is specified + # in case of weekdays/tonight + testExtract("set alarm for 9 on weekdays", + "2017-06-27 21:00:00", "set alarm weekdays") + testExtract("for 8 tonight", + "2017-06-27 20:00:00", "") + testExtract("for 8:30pm tonight", + "2017-06-27 20:30:00", "") + # Tests a time with ':' & without am/pm + testExtract("set an alarm for tonight 9:30", + "2017-06-27 21:30:00", "set alarm") + testExtract("set an alarm at 9:00 for tonight", + "2017-06-27 21:00:00", "set alarm") + # Check if it picks the intent irrespective of correctness + testExtract("set an alarm at 9 o'clock for tonight", + "2017-06-27 21:00:00", "set alarm") + testExtract("remind me about the game tonight at 11:30", + "2017-06-27 23:30:00", "remind me about game") + testExtract("set alarm at 7:30 on weekdays", + "2017-06-27 19:30:00", "set alarm on weekdays") + + # "# days " + testExtract("my birthday is 2 days from today", + "2017-06-29 00:00:00", "my birthday is") + testExtract("my birthday is 2 days after today", + "2017-06-29 00:00:00", "my birthday is") + testExtract("my birthday is 2 days from tomorrow", + "2017-06-30 00:00:00", "my birthday is") + testExtract("my birthday is 2 days after tomorrow", + "2017-06-30 00:00:00", "my birthday is") + testExtract("remind me to call mom at 10am 2 days after next saturday", + "2017-07-10 10:00:00", "remind me to call mom") + testExtract("my birthday is 2 days from yesterday", + "2017-06-28 00:00:00", "my birthday is") + testExtract("my birthday is 2 days after yesterday", + "2017-06-28 00:00:00", "my birthday is") + + # "# days ago>" + testExtract("my birthday was 1 day ago", + "2017-06-26 00:00:00", "my birthday was") + testExtract("my birthday was 2 days ago", + "2017-06-25 00:00:00", "my birthday was") + testExtract("my birthday was 3 days ago", + "2017-06-24 00:00:00", "my birthday was") + testExtract("my birthday was 4 days ago", + "2017-06-23 00:00:00", "my birthday was") + # TODO this test is imperfect due to "tonight" in the reminder, but let is pass since the date is correct + testExtract("lets meet tonight", + "2017-06-27 22:00:00", "lets meet tonight") + # TODO this test is imperfect due to "at night" in the reminder, but let is pass since the date is correct + testExtract("lets meet later at night", + "2017-06-27 22:00:00", "lets meet later at night") + # TODO this test is imperfect due to "night" in the reminder, but let is pass since the date is correct + testExtract("what's the weather like tomorrow night", + "2017-06-28 22:00:00", "what is weather like night") + # TODO this test is imperfect due to "night" in the reminder, but let is pass since the date is correct + testExtract("what's the weather like next tuesday night", + "2017-07-04 22:00:00", "what is weather like night") + + def test_extract_ambiguous_time_en(self): + morning = datetime(2017, 6, 27, 8, 1, 2, tzinfo=default_timezone()) + evening = datetime(2017, 6, 27, 20, 1, 2, tzinfo=default_timezone()) + noonish = datetime(2017, 6, 27, 12, 1, 2, tzinfo=default_timezone()) + self.assertEqual( + extract_datetime('feed the fish'), None) + self.assertEqual( + extract_datetime('day'), None) + self.assertEqual( + extract_datetime('week'), None) + self.assertEqual( + extract_datetime('month'), None) + self.assertEqual( + extract_datetime('year'), None) + self.assertEqual( + extract_datetime(' '), None) + self.assertEqual( + extract_datetime('feed fish at 10 o\'clock', morning)[0], + datetime(2017, 6, 27, 10, 0, 0, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('feed fish at 10 o\'clock', noonish)[0], + datetime(2017, 6, 27, 22, 0, 0, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('feed fish at 10 o\'clock', evening)[0], + datetime(2017, 6, 27, 22, 0, 0, tzinfo=default_timezone())) + + def test_extract_date_with_may_I_en(self): + now = datetime(2019, 7, 4, 8, 1, 2, tzinfo=default_timezone()) + may_date = datetime(2019, 5, 2, 10, 11, 20, tzinfo=default_timezone()) + self.assertEqual( + extract_datetime('May I know what time it is tomorrow', now)[0], + datetime(2019, 7, 5, 0, 0, 0, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('May I when 10 o\'clock is', now)[0], + datetime(2019, 7, 4, 10, 0, 0, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('On 24th of may I want a reminder', may_date)[0], + datetime(2019, 5, 24, 0, 0, 0, tzinfo=default_timezone())) + + def test_extract_with_other_tzinfo(self): + local_tz = default_timezone() + local_dt = datetime(2019, 7, 4, 7, 1, 2, tzinfo=local_tz) + local_tz_offset = local_tz.utcoffset(local_dt) + not_local_offset = local_tz_offset + timedelta(hours=1) + not_local_tz = tz.tzoffset('TST', not_local_offset.total_seconds()) + not_local_dt = datetime(2019, 7, 4, 8, 1, 2, tzinfo=not_local_tz) + test_dt, remainder = extract_datetime("now is the time", not_local_dt) + self.assertEqual((test_dt.year, test_dt.month, test_dt.day, + test_dt.hour, test_dt.minute, test_dt.second, + test_dt.tzinfo), + (not_local_dt.year, not_local_dt.month, not_local_dt.day, + not_local_dt.hour, not_local_dt.minute, not_local_dt.second, + not_local_dt.tzinfo)) + self.assertNotEqual((test_dt.year, test_dt.month, test_dt.day, + test_dt.hour, test_dt.minute, test_dt.second, + test_dt.tzinfo), + (local_dt.year, local_dt.month, local_dt.day, + local_dt.hour, local_dt.minute, local_dt.second, + local_dt.tzinfo)) + + def test_extract_relativedatetime_en(self): + def extractWithFormat(text): + date = datetime(2017, 6, 27, 10, 1, 2, tzinfo=default_timezone()) + [extractedDate, leftover] = extract_datetime(text, date) + extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") + return [extractedDate, leftover] + + def testExtract(text, expected_date, expected_leftover): + res = extractWithFormat(normalize(text)) + self.assertEqual(res[0], expected_date, "for=" + text) + self.assertEqual(res[1], expected_leftover, "for=" + text) + + testExtract("lets meet in 5 minutes", + "2017-06-27 10:06:02", "lets meet") + testExtract("lets meet in 5minutes", + "2017-06-27 10:06:02", "lets meet") + testExtract("lets meet in 5 seconds", + "2017-06-27 10:01:07", "lets meet") + testExtract("lets meet in 1 hour", + "2017-06-27 11:01:02", "lets meet") + testExtract("lets meet in 2 hours", + "2017-06-27 12:01:02", "lets meet") + testExtract("lets meet in 2hours", + "2017-06-27 12:01:02", "lets meet") + testExtract("lets meet in 1 minute", + "2017-06-27 10:02:02", "lets meet") + testExtract("lets meet in 1 second", + "2017-06-27 10:01:03", "lets meet") + testExtract("lets meet in 5seconds", + "2017-06-27 10:01:07", "lets meet") + + def test_normalize_numbers(self): + self.assertEqual(normalize("remind me to do something at two to two"), + "remind me to do something at 2 to 2") + self.assertEqual(normalize('what time will it be in two minutes'), + 'what time will it be in 2 minutes') + self.assertEqual(normalize('What time will it be in twenty two minutes'), + 'What time will it be in 22 minutes') + self.assertEqual(normalize("remind me to do something at twenty to two"), + "remind me to do something at 20 to 2") + + # TODO imperfect test, maybe should return 'my favorite numbers are 20 2', + # let is pass for now since this is likely a STT issue if ever + # encountered in the wild and is somewhat ambiguous, if this was + # spoken by a human the result is what we expect, if in written form + # it is ambiguous but could mean separate numbers + self.assertEqual(normalize('my favorite numbers are twenty 2'), + 'my favorite numbers are 22') + # TODO imperfect test, same as above, fixing would impact + # extract_numbers quite a bit and require a non trivial ammount of + # refactoring + self.assertEqual(normalize('my favorite numbers are 20 2'), + 'my favorite numbers are 22') + + # test ordinals + self.assertEqual(normalize('this is the first'), + 'this is first') + self.assertEqual(normalize('this is the first second'), + 'this is first second') + self.assertEqual(normalize('this is the first second and third'), + 'this is first second and third') + + # test fractions + self.assertEqual(normalize('whole hour'), + 'whole hour') + self.assertEqual(normalize('quarter hour'), + 'quarter hour') + self.assertEqual(normalize('halve hour'), + 'halve hour') + self.assertEqual(normalize('half hour'), + 'half hour') + + def test_extract_date_with_number_words(self): + now = datetime(2019, 7, 4, 8, 1, 2, tzinfo=default_timezone()) + self.assertEqual( + extract_datetime('What time will it be in 2 minutes', now)[0], + datetime(2019, 7, 4, 8, 3, 2, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('What time will it be in two minutes', now)[0], + datetime(2019, 7, 4, 8, 3, 2, tzinfo=default_timezone())) + self.assertEqual( + extract_datetime('What time will it be in two hundred minutes', now)[0], + datetime(2019, 7, 4, 11, 21, 2, tzinfo=default_timezone())) + + def test_spaces(self): + self.assertEqual(normalize(" this is a test"), + "this is test") + self.assertEqual(normalize(" this is a test "), + "this is test") + self.assertEqual(normalize(" this is one test"), + "this is 1 test") + + def test_numbers(self): + self.assertEqual(normalize("this is a one two three test"), + "this is 1 2 3 test") + self.assertEqual(normalize(" it's a four five six test"), + "it is 4 5 6 test") + self.assertEqual(normalize("it's a seven eight nine test"), + "it is 7 8 9 test") + self.assertEqual(normalize("it's a seven eight nine test"), + "it is 7 8 9 test") + self.assertEqual(normalize("that's a ten eleven twelve test"), + "that is 10 11 12 test") + self.assertEqual(normalize("that's a thirteen fourteen test"), + "that is 13 14 test") + self.assertEqual(normalize("that's fifteen sixteen seventeen"), + "that is 15 16 17") + self.assertEqual(normalize("that's eighteen nineteen twenty"), + "that is 18 19 20") + self.assertEqual(normalize("that's one nineteen twenty two"), + "that is 1 19 22") + self.assertEqual(normalize("that's one hundred"), + "that is 100") + self.assertEqual(normalize("that's one two twenty two"), + "that is 1 2 22") + self.assertEqual(normalize("that's one and a half"), + "that is 1 and half") + self.assertEqual(normalize("that's one and a half and five six"), + "that is 1 and half and 5 6") + + def test_multiple_numbers(self): + self.assertEqual(extract_numbers("this is a one two three test"), + [1.0, 2.0, 3.0]) + self.assertEqual(extract_numbers("it's a four five six test"), + [4.0, 5.0, 6.0]) + self.assertEqual(extract_numbers("this is a ten eleven twelve test"), + [10.0, 11.0, 12.0]) + self.assertEqual(extract_numbers("this is a one twenty one test"), + [1.0, 21.0]) + self.assertEqual(extract_numbers("1 dog, seven pigs, macdonald had a " + "farm, 3 times 5 macarena"), + [1, 7, 3, 5]) + self.assertEqual(extract_numbers("two beers for two bears"), + [2.0, 2.0]) + self.assertEqual(extract_numbers("twenty 20 twenty"), + [20, 20, 20]) + self.assertEqual(extract_numbers("twenty 20 22"), + [20.0, 20.0, 22.0]) + self.assertEqual(extract_numbers("twenty twenty two twenty"), + [20, 22, 20]) + self.assertEqual(extract_numbers("twenty 2"), + [22.0]) + self.assertEqual(extract_numbers("twenty 20 twenty 2"), + [20, 20, 22]) + self.assertEqual(extract_numbers("third one"), + [1 / 3, 1]) + self.assertEqual(extract_numbers("third one", ordinals=True), [3]) + self.assertEqual(extract_numbers("six trillion", short_scale=True), + [6e12]) + self.assertEqual(extract_numbers("six trillion", short_scale=False), + [6e18]) + self.assertEqual(extract_numbers("two pigs and six trillion bacteria", + short_scale=True), [2, 6e12]) + self.assertEqual(extract_numbers("two pigs and six trillion bacteria", + short_scale=False), [2, 6e18]) + self.assertEqual(extract_numbers("thirty second or first", + ordinals=True), [32, 1]) + self.assertEqual(extract_numbers("this is a seven eight nine and a" + " half test"), + [7.0, 8.0, 9.5]) + + def test_contractions(self): + self.assertEqual(normalize("ain't"), "is not") + self.assertEqual(normalize("aren't"), "are not") + self.assertEqual(normalize("can't"), "can not") + self.assertEqual(normalize("could've"), "could have") + self.assertEqual(normalize("couldn't"), "could not") + self.assertEqual(normalize("didn't"), "did not") + self.assertEqual(normalize("doesn't"), "does not") + self.assertEqual(normalize("don't"), "do not") + self.assertEqual(normalize("gonna"), "going to") + self.assertEqual(normalize("gotta"), "got to") + self.assertEqual(normalize("hadn't"), "had not") + self.assertEqual(normalize("hadn't have"), "had not have") + self.assertEqual(normalize("hasn't"), "has not") + self.assertEqual(normalize("haven't"), "have not") + # TODO: Ambiguous with "he had" + self.assertEqual(normalize("he'd"), "he would") + self.assertEqual(normalize("he'll"), "he will") + # TODO: Ambiguous with "he has" + self.assertEqual(normalize("he's"), "he is") + # TODO: Ambiguous with "how would" + self.assertEqual(normalize("how'd"), "how did") + self.assertEqual(normalize("how'll"), "how will") + # TODO: Ambiguous with "how has" and "how does" + self.assertEqual(normalize("how's"), "how is") + # TODO: Ambiguous with "I had" + self.assertEqual(normalize("I'd"), "I would") + self.assertEqual(normalize("I'll"), "I will") + self.assertEqual(normalize("I'm"), "I am") + self.assertEqual(normalize("I've"), "I have") + self.assertEqual(normalize("I haven't"), "I have not") + self.assertEqual(normalize("isn't"), "is not") + self.assertEqual(normalize("it'd"), "it would") + self.assertEqual(normalize("it'll"), "it will") + # TODO: Ambiguous with "it has" + self.assertEqual(normalize("it's"), "it is") + self.assertEqual(normalize("it isn't"), "it is not") + self.assertEqual(normalize("mightn't"), "might not") + self.assertEqual(normalize("might've"), "might have") + self.assertEqual(normalize("mustn't"), "must not") + self.assertEqual(normalize("mustn't have"), "must not have") + self.assertEqual(normalize("must've"), "must have") + self.assertEqual(normalize("needn't"), "need not") + self.assertEqual(normalize("oughtn't"), "ought not") + self.assertEqual(normalize("shan't"), "shall not") + # TODO: Ambiguous wiht "she had" + self.assertEqual(normalize("she'd"), "she would") + self.assertEqual(normalize("she hadn't"), "she had not") + self.assertEqual(normalize("she'll"), "she will") + self.assertEqual(normalize("she's"), "she is") + self.assertEqual(normalize("she isn't"), "she is not") + self.assertEqual(normalize("should've"), "should have") + self.assertEqual(normalize("shouldn't"), "should not") + self.assertEqual(normalize("shouldn't have"), "should not have") + self.assertEqual(normalize("somebody's"), "somebody is") + # TODO: Ambiguous with "someone had" + self.assertEqual(normalize("someone'd"), "someone would") + self.assertEqual(normalize("someone hadn't"), "someone had not") + self.assertEqual(normalize("someone'll"), "someone will") + # TODO: Ambiguous with "someone has" + self.assertEqual(normalize("someone's"), "someone is") + self.assertEqual(normalize("that'll"), "that will") + # TODO: Ambiguous with "that has" + self.assertEqual(normalize("that's"), "that is") + # TODO: Ambiguous with "that had" + self.assertEqual(normalize("that'd"), "that would") + # TODO: Ambiguous with "there had" + self.assertEqual(normalize("there'd"), "there would") + self.assertEqual(normalize("there're"), "there are") + # TODO: Ambiguous with "there has" + self.assertEqual(normalize("there's"), "there is") + # TODO: Ambiguous with "they had" + self.assertEqual(normalize("they'd"), "they would") + self.assertEqual(normalize("they'll"), "they will") + self.assertEqual(normalize("they won't have"), "they will not have") + self.assertEqual(normalize("they're"), "they are") + self.assertEqual(normalize("they've"), "they have") + self.assertEqual(normalize("they haven't"), "they have not") + self.assertEqual(normalize("wasn't"), "was not") + # TODO: Ambiguous wiht "we had" + self.assertEqual(normalize("we'd"), "we would") + self.assertEqual(normalize("we would've"), "we would have") + self.assertEqual(normalize("we wouldn't"), "we would not") + self.assertEqual(normalize("we wouldn't have"), "we would not have") + self.assertEqual(normalize("we'll"), "we will") + self.assertEqual(normalize("we won't have"), "we will not have") + self.assertEqual(normalize("we're"), "we are") + self.assertEqual(normalize("we've"), "we have") + self.assertEqual(normalize("weren't"), "were not") + self.assertEqual(normalize("what'd"), "what did") + self.assertEqual(normalize("what'll"), "what will") + self.assertEqual(normalize("what're"), "what are") + # TODO: Ambiguous with "what has" / "what does") + self.assertEqual(normalize("whats"), "what is") + self.assertEqual(normalize("what's"), "what is") + self.assertEqual(normalize("what've"), "what have") + # TODO: Ambiguous with "when has" + self.assertEqual(normalize("when's"), "when is") + self.assertEqual(normalize("where'd"), "where did") + # TODO: Ambiguous with "where has" / where does" + self.assertEqual(normalize("where's"), "where is") + self.assertEqual(normalize("where've"), "where have") + # TODO: Ambiguous with "who had" "who did") + self.assertEqual(normalize("who'd"), "who would") + self.assertEqual(normalize("who'd've"), "who would have") + self.assertEqual(normalize("who'll"), "who will") + self.assertEqual(normalize("who're"), "who are") + # TODO: Ambiguous with "who has" / "who does" + self.assertEqual(normalize("who's"), "who is") + self.assertEqual(normalize("who've"), "who have") + self.assertEqual(normalize("why'd"), "why did") + self.assertEqual(normalize("why're"), "why are") + # TODO: Ambiguous with "why has" / "why does" + self.assertEqual(normalize("why's"), "why is") + self.assertEqual(normalize("won't"), "will not") + self.assertEqual(normalize("won't've"), "will not have") + self.assertEqual(normalize("would've"), "would have") + self.assertEqual(normalize("wouldn't"), "would not") + self.assertEqual(normalize("wouldn't've"), "would not have") + self.assertEqual(normalize("ya'll"), "you all") + self.assertEqual(normalize("y'all"), "you all") + self.assertEqual(normalize("y'ain't"), "you are not") + # TODO: Ambiguous with "you had" + self.assertEqual(normalize("you'd"), "you would") + self.assertEqual(normalize("you'd've"), "you would have") + self.assertEqual(normalize("you'll"), "you will") + self.assertEqual(normalize("you're"), "you are") + self.assertEqual(normalize("you aren't"), "you are not") + self.assertEqual(normalize("you've"), "you have") + self.assertEqual(normalize("you haven't"), "you have not") + + def test_combinations(self): + self.assertEqual(normalize("I couldn't have guessed there'd be two"), + "I could not have guessed there would be 2") + self.assertEqual(normalize("I wouldn't have"), "I would not have") + self.assertEqual(normalize("I hadn't been there"), + "I had not been there") + self.assertEqual(normalize("I would've"), "I would have") + self.assertEqual(normalize("it hadn't"), "it had not") + self.assertEqual(normalize("it hadn't have"), "it had not have") + self.assertEqual(normalize("it would've"), "it would have") + self.assertEqual(normalize("she wouldn't have"), "she would not have") + self.assertEqual(normalize("she would've"), "she would have") + self.assertEqual(normalize("someone wouldn't have"), + "someone would not have") + self.assertEqual(normalize("someone would've"), "someone would have") + self.assertEqual(normalize("what's the weather like"), + "what is weather like") + self.assertEqual(normalize("that's what I told you"), + "that is what I told you") + + self.assertEqual(normalize("whats 8 + 4"), "what is 8 + 4") + + # TODO not localized; needed in english? + def test_gender(self): + self.assertRaises((AttributeError, FunctionNotLocalizedError), + get_gender, "person", None) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/test_parse_fa.py b/test/test_parse_fa.py index 8df33b45..93703184 100644 --- a/test/test_parse_fa.py +++ b/test/test_parse_fa.py @@ -17,18 +17,13 @@ from datetime import datetime, timedelta from lingua_franca import load_language, unload_language, set_default_lang -from lingua_franca.internal import FunctionNotLocalizedError from lingua_franca.parse import extract_datetime from lingua_franca.parse import extract_duration from lingua_franca.parse import extract_number, extract_numbers -from lingua_franca.parse import fuzzy_match -from lingua_franca.parse import get_gender -from lingua_franca.parse import match_one -from lingua_franca.parse import normalize +from lingua_franca.time import set_default_tz, default_timezone def setUpModule(): - # TODO spin off English tests load_language('fa') set_default_lang('fa') @@ -36,32 +31,33 @@ def setUpModule(): def tearDownModule(): unload_language('fa') + class TestNormalize(unittest.TestCase): def test_extract_number(self): - #self.assertEqual(extract_number("این تست اول است", + # self.assertEqual(extract_number("این تست اول است", # ordinals=True), 1) self.assertEqual(extract_number("این تست دو است"), 2) - #self.assertEqual(extract_number("این تست دوم است", + # self.assertEqual(extract_number("این تست دوم است", # ordinals=True), 2) - #self.assertEqual(extract_number("این تست سوم است", + # self.assertEqual(extract_number("این تست سوم است", # ordinals=True), 3.0) - #self.assertEqual(extract_number("چهارمی", ordinals=True), 4.0) - #self.assertEqual(extract_number("سی و ششمی", ordinals=True), 36.0) + # self.assertEqual(extract_number("چهارمی", ordinals=True), 4.0) + # self.assertEqual(extract_number("سی و ششمی", ordinals=True), 36.0) self.assertEqual(extract_number("این تست شماره چهار است"), 4) - #self.assertEqual(extract_number("یک سوم فنجان"), 1.0 / 3.0) + # self.assertEqual(extract_number("یک سوم فنجان"), 1.0 / 3.0) self.assertEqual(extract_number("سه فنجان"), 3) - #self.assertEqual(extract_number("۱/۳ فنجان"), 1.0 / 3.0) - #self.assertEqual(extract_number("یک چهارم فنجان"), 0.25) - #self.assertEqual(extract_number("۱/۴ فنجان"), 0.25) - #self.assertEqual(extract_number("دو سوم فنجان"), 2.0 / 3.0) - #self.assertEqual(extract_number("سه چهارم فنجان"), 3.0 / 4.0) - #self.assertEqual(extract_number("یک و سه چهارم فنجان"), 1.75) - #self.assertEqual(extract_number("۱ فنجان و نیم"), 1.5) - #self.assertEqual(extract_number("یک فنجان و نیم"), 1.5) + # self.assertEqual(extract_number("۱/۳ فنجان"), 1.0 / 3.0) + # self.assertEqual(extract_number("یک چهارم فنجان"), 0.25) + # self.assertEqual(extract_number("۱/۴ فنجان"), 0.25) + # self.assertEqual(extract_number("دو سوم فنجان"), 2.0 / 3.0) + # self.assertEqual(extract_number("سه چهارم فنجان"), 3.0 / 4.0) + # self.assertEqual(extract_number("یک و سه چهارم فنجان"), 1.75) + # self.assertEqual(extract_number("۱ فنجان و نیم"), 1.5) + # self.assertEqual(extract_number("یک فنجان و نیم"), 1.5) self.assertEqual(extract_number("یک و نیم فنجان"), 1.5) self.assertEqual(extract_number("بیست و دو"), 22) - #self.assertEqual(extract_number("بیست و دو و سه پنجم"), 22.6) + # self.assertEqual(extract_number("بیست و دو و سه پنجم"), 22.6) self.assertEqual(extract_number("دویست"), 200) self.assertEqual(extract_number("نه هزار"), 9000) self.assertEqual(extract_number("هزار و پانصد"), 1500) @@ -73,7 +69,7 @@ def test_extract_number(self): self.assertEqual(extract_number("دو میلیون و پانصد هزار " "تن گوشت یخ زده"), 2500000) - def test_extract_duration_en(self): + def test_extract_duration_fa(self): self.assertEqual(extract_duration("10 ثانیه"), (timedelta(seconds=10.0), "")) self.assertEqual(extract_duration("5 دقیقه"), @@ -99,10 +95,11 @@ def test_extract_duration_en(self): self.assertEqual(extract_duration("این فیلم یک ساعت و پنجاه و هفت و نیم دقیقه " "طول می کشد"), (timedelta(hours=1, minutes=57.5), - "این فیلم طول می کشد")) - def test_extractdatetime_en(self): + "این فیلم طول می کشد")) + + def test_extractdatetime_fa(self): def extractWithFormat(text): - date = datetime(2017, 6, 27, 13, 4) # Tue June 27, 2017 @ 1:04pm + date = datetime(2017, 6, 27, 13, 4, tzinfo=default_timezone()) # Tue June 27, 2017 @ 1:04pm [extractedDate, leftover] = extract_datetime(text, date) extractedDate = extractedDate.strftime("%Y-%m-%d %H:%M:%S") return [extractedDate, leftover] @@ -138,7 +135,7 @@ def testExtract(text, expected_date, expected_leftover): "2017-06-29 00:00:00", "") testExtract("آب و هوا پس فردا چطوره؟", "2017-06-29 00:00:00", "آب و هوا چطوره؟") - #testExtract("ساعت بیست و دو و چهل و پنج دقیقه بهم یادآوری کن", + # testExtract("ساعت بیست و دو و چهل و پنج دقیقه بهم یادآوری کن", # "2017-06-27 22:45:00", "بهم یادآوری کن") testExtract("هوای جمعه صبح چطوره؟", "2017-06-30 08:00:00", "هوای چطوره؟") @@ -148,11 +145,11 @@ def testExtract(text, expected_date, expected_leftover): "2017-06-27 15:00:00", "هوای چطوره؟") testExtract("یادم بنداز که هشت هفته و دو روز دیگه به مادرم زنگ بزنم", "2017-08-24 00:00:00", "یادم بنداز که به مادرم زنگ بزنم") - #testExtract("یادم بنداز که دوازده مرداد به مادرم زنگ بزنم", + # testExtract("یادم بنداز که دوازده مرداد به مادرم زنگ بزنم", # "2017-08-03 00:00:00", "یادم بنداز که به مادرم زنگ بزنم") - #testExtract("یادم بنداز که ساعت هفت به مادرم زنگ بزنم", + # testExtract("یادم بنداز که ساعت هفت به مادرم زنگ بزنم", # "2017-06-28 07:00:00", "یادم بنداز که به مادرم زنگ بزنم") - #testExtract("یادم بنداز که فردا ساعت بیست و دو به مادرم زنگ بزنم", + # testExtract("یادم بنداز که فردا ساعت بیست و دو به مادرم زنگ بزنم", # "2017-06-28 22:00:00", "یادم بنداز که به مادرم زنگ بزنم") # TODO: This test is imperfect due to the "at 7:00" still in the # remainder. But let it pass for now since time is correct @@ -162,8 +159,6 @@ def test_multiple_numbers(self): [1.0, 2.0, 3.0]) self.assertEqual(extract_numbers("ده بیست سه پونزده هزار و شصت و شونزده"), [10, 20, 3, 15060, 16]) - - if __name__ == "__main__":