From bef6859082069c127e7b6fdc84fe528f3e2087b3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 13:08:08 +0800 Subject: [PATCH 001/271] setup code rewrite --- CHANGELOG.md | 7 ++ src/parser.js | 204 ++++++++++++++++++++++++++++++++++++++++++++++ src/vocabulary.js | 145 ++++++++++++++++++++++++++++++++ 3 files changed, 356 insertions(+) create mode 100644 src/parser.js create mode 100644 src/vocabulary.js diff --git a/CHANGELOG.md b/CHANGELOG.md index 8778093..7104ba7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,12 @@ # Changelog +## 0.2.0 (On development) + +For this version. The whole code has been rewritten to be easier to modify. + +- Rewrite parser to be more declarative. +- Rewrite whole code to be modular. + ## 0.1.1 - Update copyright notice diff --git a/src/parser.js b/src/parser.js new file mode 100644 index 0000000..ab28220 --- /dev/null +++ b/src/parser.js @@ -0,0 +1,204 @@ +class ParseError extends Error {} +class UnrecognizedError extends ParseError {} + +function nothing() { + return function (src) { + return { + output: [{ value: null, rest: src }], + error: null, + }; + }; +} +function eol() { + return function (src) { + if (src === "") { + return { output: [{ value: null, rest: "" }], error: null }; + } else { + return { + output: [], + error: new ParseError( + `Expected end of phrase/sentence, found "${src}"` + ), + }; + } + }; +} +function map(parser, mapper) { + return function (src) { + const result = parser(src); + if (result.error) { + return result; + } + const wholeOutput = []; + let wholeError = null; + for (const { value, rest } in result.output) { + try { + wholeOutput.push({ value: mapper(value), rest }); + } catch (error) { + if (!wholeError) { + wholeError = error; + } + } + } + if (wholeOutput.length === 0) { + return { + output: [], + error: wholeError ?? new ParseError("No error provided"), + }; + } else { + return { + output: wholeOutput, + error: null, + }; + } + }; +} +function choice(choices) { + return function (src) { + let wholeOutput = []; + let wholeError = null; + for (const parser of choices) { + const { output, error } = parser(src); + if (error) { + if (!wholeError) { + wholeError = error; + } + } else { + wholeOutput = wholeOutput.concat(output); + } + } + if (wholeOutput.length === 0) { + return { + output: [], + error: wholeError ?? new ParseError("No error provided"), + }; + } else { + return { + output: wholeOutput, + error: null, + }; + } + }; +} +function optional(parser) { + return choice([nothing(), parser]); +} +function sequence(sequence) { + if (sequence.length === 0) { + throw new Error("sequences can't be empty"); + } + return function (src) { + let wholeOutput = [{ value: [], rest: src }]; + let wholeError = null; + for (const parser of sequence) { + let newOutput = []; + for (const { value, rest } of wholeOutput) { + const { output, error } = parser(rest); + if (error) { + if (!wholeError) { + wholeError = error; + } + } else { + for (const { value: newValue, rest } of output) { + newOutput.push({ + value: value.concat([newValue]), + rest, + }); + } + } + } + wholeOutput = newOutput; + } + if (wholeOutput.length === 0) { + return { + output: [], + error: wholeError ?? new ParseError("No error provided"), + }; + } else { + return { + output: wholeOutput, + error: null, + }; + } + }; +} +function allSpace() { + return function (src) { + const position = src.search(/\S/); + if (position === -1) { + return { + output: [ + { + value: "", + rest: src, + }, + ], + error: null, + }; + } else { + return { + output: [ + { + value: src.slice(0, position), + rest: src.slice(position), + }, + ], + error: null, + }; + } + }; +} +function wordOnly() { + return function (src) { + const position = src.search(/\W/); + if (position === -1) { + if (src === "") { + return { + output: [], + error: new ParseError("Expected word, found end of phrase/sentence"), + }; + } else { + return { + output: [[{ value: src, rest: "" }]], + error: null, + }; + } + } else if (position === 0) { + return { + output: [], + error: new ParseError(`Expected word, found space`), + }; + } else { + return { + output: [ + { + value: src.slice(0, position), + rest: src.slice(position), + }, + ], + error: null, + }; + } + }; +} +function word() { + return map(sequence([wordOnly(), allSpace()]), ([word, _]) => word); +} +function wordFrom(set) { + return map(word(), (word) => { + if (set.has(word)) { + return word; + } else { + throw new UnrecognizedError(`"${word}"`); + } + }); +} +function specificWord(word) { + return map(word(), (thisWord) => { + if (word === thisWord) { + return thisWord; + } else { + throw new UnrecognizedError(`"${thisWord}"`); + } + }); +} diff --git a/src/vocabulary.js b/src/vocabulary.js new file mode 100644 index 0000000..65144db --- /dev/null +++ b/src/vocabulary.js @@ -0,0 +1,145 @@ +export const PARTICLES = new Set([ + "a", + "ala", + "anu", + "e", + "en", + "la", + "li", + "nanpa", + "o", + "pi", + "taso", +]); +export const HEADWORD = new Set([ + "akesi", + "ala", + "alasa", + "ale", + "ali", + "anpa", + "ante", + "awen", + "esun", + "ijo", + "ike", + "ilo", + "insa", + "jaki", + "jan", + "jelo", + "jo", + "kala", + "kalama", + "kama", + "kasi", + "ken", + "kili", + "kiwen", + "ko", + "kon", + "kule", + "kulupu", + "kute", + "lape", + "laso", + "lawa", + "len", + "lete", + "lili", + "linja", + "lipu", + "loje", + "lon", + "luka", + "lukin", + "lupa", + "ma", + "mama", + "mani", + "meli", + "mi", + "mije", + "moku", + "moli", + "monsi", + "mu", + "mun", + "musi", + "mute", + "nanpa", + "nasa", + "nasin", + "nena", + "ni", + "nimi", + "noka", + "olin", + "ona", + "open", + "pakala", + "pali", + "palisa", + "pan", + "pana", + "pilin", + "pimeja", + "pini", + "pipi", + "poka", + "poki", + "pona", + "sama", + "seli", + "selo", + "seme", + "sewi", + "sijelo", + "sike", + "sin", + "sina", + "sinpin", + "sitelen", + "sona", + "soweli", + "suli", + "suno", + "supa", + "suwi", + "tan", + "tawa", + "telo", + "tenpo", + "toki", + "tomo", + "tonsi", + "tu", + "unpa", + "uta", + "utala", + "walo", + "wan", + "waso", + "wawa", + "weka", + "wile", +]); +export const MODIFIER = new Set([...HEADWORD, "taso"]); +const PREVERB = new Set([ + "alasa", + "awen", + "kama", + "ken", + "lukin", + "open", + "pini", + "sona", + "wile", +]); +export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); +export const VOCABULARY = new Set([ + ...PARTICLES, + ...HEADWORD, + ...PREVERB, + ...PREPOSITION, +]); From 1c312b6df1df4059265e43aec73eea09e92d26a2 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 13:10:46 +0800 Subject: [PATCH 002/271] ordering of choices matters --- src/parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index ab28220..f7b97e7 100644 --- a/src/parser.js +++ b/src/parser.js @@ -81,7 +81,7 @@ function choice(choices) { }; } function optional(parser) { - return choice([nothing(), parser]); + return choice([parser, nothing()]); } function sequence(sequence) { if (sequence.length === 0) { From 38becada6d3c8939ad5b27c6ab983c85885ae6d4 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 13:36:45 +0800 Subject: [PATCH 003/271] update description of limitation --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 62754b8..7d8bd07 100644 --- a/README.md +++ b/README.md @@ -34,9 +34,9 @@ These are the terminology used in [limitations] [limitations]: #limitations -The following are currently unrecognized (non-definitive but pedantic). +The following are currently unrecognized (non-definitive but pedantic). ✏️ means it is a limitation due to being work in progress and it will be lifted soon. Other limitation may also be lifted. -- Full sentences: It can only translate phrases for now. The following limitations pretends the translator can translate full sentences, this is because these are planned limitations. +- ✏️ Full sentences: It can only translate phrases for now. - Non-pu vocabulary with exception to "pu" ("tonsi" is included in the vocabulary) - Multiple sentences - Comma as sentence separator (commas are treated as decoration and ignored) @@ -64,5 +64,3 @@ The following are currently unrecognized (non-definitive but pedantic). - "o a" - "e a" - "pi a" - -Some of these may be lifted in the future. From a3b4f8514b17a32184de9b077d0154809ffa9f64 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 15:10:03 +0800 Subject: [PATCH 004/271] update readme --- README.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7d8bd07..145bdc3 100644 --- a/README.md +++ b/README.md @@ -22,12 +22,14 @@ Some of these may be lifted in the future. ## Terminology -These are the terminology used in [limitations] +These are the terminology used in [limitations]. **These are not official grammatical terms**. - Headword – A single part of speech that in English, can be a noun, a verb, or an adjective; what the phrase starts with. - Modifier – A part of speech that modifies headword or another modifier. - Phrase – Headword and its modifiers. -- Clause – A part of sentence without "la" particle, "taso" particle in the beginning, "a" particles in the beginning and the end; found before and after "la", or the sentence itself without particles around it if it doesn't have "la". +- Preclause – "taso" or "a" particle before clauses. +- Postclause – "a" particle after clauses. +- Clause – Phrase or sentence found before and after "la". - Proper Word – Proper name; Capitalized in Toki Pona. ## Limitations @@ -64,3 +66,10 @@ The following are currently unrecognized (non-definitive but pedantic). ✏️ m - "o a" - "e a" - "pi a" + +## New Limitations + +The whole code is being rewritten and there will be new different limitations. + +- ✏️ a particle +- nanpa particle not followed by number words (wan, tu, luka, mute, ale/ali) or pini From 8b0439df35ca7e0a17a1923b7de39e45e5e47346 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 15:22:10 +0800 Subject: [PATCH 005/271] add number vocabulary --- src/vocabulary.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/vocabulary.js b/src/vocabulary.js index 65144db..2412205 100644 --- a/src/vocabulary.js +++ b/src/vocabulary.js @@ -125,7 +125,8 @@ export const HEADWORD = new Set([ "wile", ]); export const MODIFIER = new Set([...HEADWORD, "taso"]); -const PREVERB = new Set([ +export const NUMBER = new Set([wan, tu, luka, mute, ale, ali]); +export const PREVERB = new Set([ "alasa", "awen", "kama", From bb788f82ebafcca2c612037a1aad3e9c77bbfc83 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 15:22:41 +0800 Subject: [PATCH 006/271] implement all combinator and nanpa parser --- src/parser.js | 60 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/src/parser.js b/src/parser.js index f7b97e7..31139a1 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,3 +1,5 @@ +import { HEADWORD, NUMBER } from "./vocabulary"; + class ParseError extends Error {} class UnrecognizedError extends ParseError {} @@ -122,6 +124,46 @@ function sequence(sequence) { } }; } +function all(parser) { + return function (src) { + let wholeOutput = [{ value: [], rest: src }]; + let wholeError = null; + while (true) { + let newOutput = []; + for (const { value, rest } of wholeOutput) { + const { output, error } = parser(rest); + if (error) { + if (!wholeError) { + wholeError = error; + } + } else { + for (const { value: newValue, rest } of output) { + newOutput.push({ + value: value.concat([newValue]), + rest, + }); + } + } + } + if (newOutput.length === 0) { + break; + } else { + wholeOutput = newOutput; + } + } + if (wholeOutput.length === 0) { + return { + output: [], + error: wholeError ?? new ParseError("No error provided"), + }; + } else { + return { + output: wholeOutput, + error: null, + }; + } + }; +} function allSpace() { return function (src) { const position = src.search(/\S/); @@ -202,3 +244,21 @@ function specificWord(word) { } }); } +function headWord() { + return wordFrom(HEADWORD); +} +function nanpa() { + return map( + sequence([ + specificWord("nanpa"), + choice([ + map(specificWord("pini"), (_) => ["pini"]), + map( + sequence([wordFrom(NUMBER), all(wordFrom(NUMBER))]), + ([first, rest]) => [first, ...rest] + ), + ]), + ]), + ([_, number]) => number + ); +} From a032b026d2134f08ac4fd83b150c1aeec22cc9cc Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 15:34:24 +0800 Subject: [PATCH 007/271] use spread instead of concat --- src/parser.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.js b/src/parser.js index 31139a1..901a32f 100644 --- a/src/parser.js +++ b/src/parser.js @@ -66,7 +66,7 @@ function choice(choices) { wholeError = error; } } else { - wholeOutput = wholeOutput.concat(output); + wholeOutput = [...wholeOutput, ...output]; } } if (wholeOutput.length === 0) { @@ -103,7 +103,7 @@ function sequence(sequence) { } else { for (const { value: newValue, rest } of output) { newOutput.push({ - value: value.concat([newValue]), + value: [...value, ...newValue], rest, }); } @@ -139,7 +139,7 @@ function all(parser) { } else { for (const { value: newValue, rest } of output) { newOutput.push({ - value: value.concat([newValue]), + value: [...value, ...newValue], rest, }); } From e2b2f17a5522a25a6f79add04af68ce69a161321 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 16:26:57 +0800 Subject: [PATCH 008/271] huge reformat --- src/parser.js | 316 ++++++++++++++++++++++---------------------------- 1 file changed, 140 insertions(+), 176 deletions(-) diff --git a/src/parser.js b/src/parser.js index 901a32f..77204fd 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,105 +1,120 @@ -import { HEADWORD, NUMBER } from "./vocabulary"; +import { HEADWORD } from "./vocabulary"; class ParseError extends Error {} class UnrecognizedError extends ParseError {} -function nothing() { - return function (src) { - return { - output: [{ value: null, rest: src }], - error: null, - }; - }; -} -function eol() { - return function (src) { - if (src === "") { - return { output: [{ value: null, rest: "" }], error: null }; +class Output { + constructor(output) { + if (Array.isArray(output)) { + this.output = output; + this.error = null; + } else if (output instanceof Error) { + this.output = []; + this.error = output; } else { - return { - output: [], - error: new ParseError( - `Expected end of phrase/sentence, found "${src}"` - ), - }; + throw new Error("passed not array nor error"); } - }; -} -function map(parser, mapper) { - return function (src) { - const result = parser(src); - if (result.error) { - return result; + } + push(output) { + this.output.push(output); + this.error = null; + } + append({ output, error }) { + this.output = [...this.output, ...output]; + if (this.output.length > 0) { + this.error = null; + } else { + this.error = error; } - const wholeOutput = []; - let wholeError = null; - for (const { value, rest } in result.output) { - try { - wholeOutput.push({ value: mapper(value), rest }); - } catch (error) { - if (!wholeError) { - wholeError = error; + } + setError(error) { + if (!this.error && this.output.length > 0) { + this.error = error; + } + } + isError() { + return this.output.length === 0; + } +} +class Parser { + constructor(parser) { + this.parser = parser; + } + map(mapper) { + return new Parser((src) => { + const result = this.parser(src); + if (result.error) { + return result; + } + const output = new Output([]); + for (const { value, rest } in result.output) { + try { + output.push({ value: mapper(value), rest }); + } catch (error) { + output.setError(error); } } + return output; + }); + } +} +function char() { + return new Parser((src) => { + if (src.length === 0) { + return new Output( + new ParseError("Expected character, found end of phrase/sentence") + ); + } else { + let [first] = src; + return new Output([ + { + value: first, + rest: src.slice(first.length), + }, + ]); } - if (wholeOutput.length === 0) { - return { - output: [], - error: wholeError ?? new ParseError("No error provided"), - }; + }); +} +function nothing() { + return new Parser((src) => { + return new Output([{ value: null, rest: src }]); + }); +} +function eol() { + return new Parser((src) => { + if (src === "") { + return new Output([{ value: null, rest: "" }]); } else { - return { - output: wholeOutput, - error: null, - }; + return new Output( + new ParseError(`Expected end of phrase/sentence, found "${src}"`) + ); } - }; + }); } -function choice(choices) { - return function (src) { - let wholeOutput = []; - let wholeError = null; +function choice(...choices) { + return new Parser((src) => { + let output = new Output([]); for (const parser of choices) { - const { output, error } = parser(src); - if (error) { - if (!wholeError) { - wholeError = error; - } - } else { - wholeOutput = [...wholeOutput, ...output]; - } - } - if (wholeOutput.length === 0) { - return { - output: [], - error: wholeError ?? new ParseError("No error provided"), - }; - } else { - return { - output: wholeOutput, - error: null, - }; + output.append(parser.parser(src)); } - }; + return output; + }); } function optional(parser) { - return choice([parser, nothing()]); + return choice(parser, nothing()); } -function sequence(sequence) { +function sequence(...sequence) { if (sequence.length === 0) { throw new Error("sequences can't be empty"); } - return function (src) { - let wholeOutput = [{ value: [], rest: src }]; - let wholeError = null; + return new Parser((src) => { + let wholeOutput = new Output([{ value: [], rest: src }]); for (const parser of sequence) { - let newOutput = []; - for (const { value, rest } of wholeOutput) { - const { output, error } = parser(rest); - if (error) { - if (!wholeError) { - wholeError = error; - } + let newOutput = new Output([]); + for (const { value, rest } of wholeOutput.output) { + const { output, error } = parser.parser(rest); + if (output.length === 0) { + newOutput.setError(error); } else { for (const { value: newValue, rest } of output) { newOutput.push({ @@ -111,31 +126,18 @@ function sequence(sequence) { } wholeOutput = newOutput; } - if (wholeOutput.length === 0) { - return { - output: [], - error: wholeError ?? new ParseError("No error provided"), - }; - } else { - return { - output: wholeOutput, - error: null, - }; - } - }; + return wholeOutput; + }); } function all(parser) { - return function (src) { - let wholeOutput = [{ value: [], rest: src }]; - let wholeError = null; + return new Parser((src) => { + let wholeOutput = new Output([{ value: [], rest: src }]); while (true) { - let newOutput = []; - for (const { value, rest } of wholeOutput) { - const { output, error } = parser(rest); - if (error) { - if (!wholeError) { - wholeError = error; - } + let newOutput = new Output([]); + for (const { value, rest } of wholeOutput.output) { + const { output, error } = parser.parser(rest); + if (output.length === 0) { + newOutput.setError(error); } else { for (const { value: newValue, rest } of output) { newOutput.push({ @@ -145,89 +147,66 @@ function all(parser) { } } } - if (newOutput.length === 0) { + if (newOutput.isError()) { break; } else { wholeOutput = newOutput; } } - if (wholeOutput.length === 0) { - return { - output: [], - error: wholeError ?? new ParseError("No error provided"), - }; - } else { - return { - output: wholeOutput, - error: null, - }; - } - }; + return wholeOutput; + }); +} +function allAtLeastOnce(parser) { + return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } function allSpace() { - return function (src) { + return new Parser((src) => { const position = src.search(/\S/); if (position === -1) { - return { - output: [ - { - value: "", - rest: src, - }, - ], - error: null, - }; + return new Output([ + { + value: "", + rest: src, + }, + ]); } else { - return { - output: [ - { - value: src.slice(0, position), - rest: src.slice(position), - }, - ], - error: null, - }; + return new Output([ + { + value: src.slice(0, position), + rest: src.slice(position), + }, + ]); } - }; + }); } function wordOnly() { - return function (src) { + return new ParseError((src) => { const position = src.search(/\W/); if (position === -1) { if (src === "") { - return { - output: [], - error: new ParseError("Expected word, found end of phrase/sentence"), - }; + return new Output( + new ParseError("Expected word, found end of phrase/sentence") + ); } else { - return { - output: [[{ value: src, rest: "" }]], - error: null, - }; + return new Output([{ value: src, rest: "" }]); } } else if (position === 0) { - return { - output: [], - error: new ParseError(`Expected word, found space`), - }; + return new Output(new ParseError(`Expected word, found space`)); } else { - return { - output: [ - { - value: src.slice(0, position), - rest: src.slice(position), - }, - ], - error: null, - }; + return new Output([ + { + value: src.slice(0, position), + rest: src.slice(position), + }, + ]); } - }; + }); } function word() { - return map(sequence([wordOnly(), allSpace()]), ([word, _]) => word); + return sequence(wordOnly(), allSpace()).map(([word, _]) => word); } function wordFrom(set) { - return map(word(), (word) => { + return word().map((word) => { if (set.has(word)) { return word; } else { @@ -236,7 +215,7 @@ function wordFrom(set) { }); } function specificWord(word) { - return map(word(), (thisWord) => { + return word().map((thisWord) => { if (word === thisWord) { return thisWord; } else { @@ -247,18 +226,3 @@ function specificWord(word) { function headWord() { return wordFrom(HEADWORD); } -function nanpa() { - return map( - sequence([ - specificWord("nanpa"), - choice([ - map(specificWord("pini"), (_) => ["pini"]), - map( - sequence([wordFrom(NUMBER), all(wordFrom(NUMBER))]), - ([first, rest]) => [first, ...rest] - ), - ]), - ]), - ([_, number]) => number - ); -} From 0d5625911a31287effbc8f31a920ea2b1b9a56a0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 16:52:57 +0800 Subject: [PATCH 009/271] change error to unrecognized error --- src/parser.js | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser.js b/src/parser.js index 77204fd..cd4d3d0 100644 --- a/src/parser.js +++ b/src/parser.js @@ -85,9 +85,7 @@ function eol() { if (src === "") { return new Output([{ value: null, rest: "" }]); } else { - return new Output( - new ParseError(`Expected end of phrase/sentence, found "${src}"`) - ); + return new Output(new UnrecognizedError(`"${src}"`)); } }); } From c76581858f54e4b378563ac4d66f36dd75ec9885 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:20:28 +0800 Subject: [PATCH 010/271] implement regex matching --- src/parser.js | 53 +++++++++++++++++---------------------------------- 1 file changed, 17 insertions(+), 36 deletions(-) diff --git a/src/parser.js b/src/parser.js index cd4d3d0..805892f 100644 --- a/src/parser.js +++ b/src/parser.js @@ -1,6 +1,7 @@ import { HEADWORD } from "./vocabulary"; class ParseError extends Error {} +class UnreachableError extends ParseError {} class UnrecognizedError extends ParseError {} class Output { @@ -58,20 +59,23 @@ class Parser { }); } } -function char() { +function match(regex) { + const newRegex = new RegExp("^" + regex.source, regex.flags); return new Parser((src) => { - if (src.length === 0) { - return new Output( - new ParseError("Expected character, found end of phrase/sentence") - ); + const match = src.match(newRegex); + if (match) { + return new Output([{ value: match, rest: src.slice(match[0].length) }]); } else { - let [first] = src; - return new Output([ - { - value: first, - rest: src.slice(first.length), - }, - ]); + if (src === "") { + return new UnreachableError(); + } else { + const token = src.match(/(.*)(?:\s|$)/)[1]; + if (token === "") { + return new UnreachableError(); + } else { + return new Output(new UnrecognizedError(`"${token}"`)); + } + } } }); } @@ -177,31 +181,8 @@ function allSpace() { } }); } -function wordOnly() { - return new ParseError((src) => { - const position = src.search(/\W/); - if (position === -1) { - if (src === "") { - return new Output( - new ParseError("Expected word, found end of phrase/sentence") - ); - } else { - return new Output([{ value: src, rest: "" }]); - } - } else if (position === 0) { - return new Output(new ParseError(`Expected word, found space`)); - } else { - return new Output([ - { - value: src.slice(0, position), - rest: src.slice(position), - }, - ]); - } - }); -} function word() { - return sequence(wordOnly(), allSpace()).map(([word, _]) => word); + return match(/([a-z])\s*/).map(([_, word]) => word); } function wordFrom(set) { return word().map((word) => { From d8f245cd2be36a5891fb5c33a2f0dd55c063a48f Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:21:26 +0800 Subject: [PATCH 011/271] change simplify allSpace() --- src/parser.js | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) diff --git a/src/parser.js b/src/parser.js index 805892f..62122dd 100644 --- a/src/parser.js +++ b/src/parser.js @@ -162,24 +162,7 @@ function allAtLeastOnce(parser) { return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } function allSpace() { - return new Parser((src) => { - const position = src.search(/\S/); - if (position === -1) { - return new Output([ - { - value: "", - rest: src, - }, - ]); - } else { - return new Output([ - { - value: src.slice(0, position), - rest: src.slice(position), - }, - ]); - } - }); + return new match(/\s*/); } function word() { return match(/([a-z])\s*/).map(([_, word]) => word); From d4b6a54624618b67be53953ee666931dc6c88073 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:28:52 +0800 Subject: [PATCH 012/271] fix word parser and implement proper word parser --- src/parser.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 62122dd..c98c54d 100644 --- a/src/parser.js +++ b/src/parser.js @@ -165,7 +165,12 @@ function allSpace() { return new match(/\s*/); } function word() { - return match(/([a-z])\s*/).map(([_, word]) => word); + return match(/([a-z]+)\s*/).map(([_, word]) => word); +} +function properWord() { + return all(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => + array.join(" ") + ); } function wordFrom(set) { return word().map((word) => { From 3cf2bc12ab2c8d24d3d3218985503a6adc9ee057 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:30:05 +0800 Subject: [PATCH 013/271] format --- src/parser.js | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/parser.js b/src/parser.js index c98c54d..3382f1c 100644 --- a/src/parser.js +++ b/src/parser.js @@ -65,16 +65,14 @@ function match(regex) { const match = src.match(newRegex); if (match) { return new Output([{ value: match, rest: src.slice(match[0].length) }]); + } else if (src === "") { + return new UnreachableError(); } else { - if (src === "") { + const token = src.match(/(.*)(?:\s|$)/)[1]; + if (token === "") { return new UnreachableError(); } else { - const token = src.match(/(.*)(?:\s|$)/)[1]; - if (token === "") { - return new UnreachableError(); - } else { - return new Output(new UnrecognizedError(`"${token}"`)); - } + return new Output(new UnrecognizedError(`"${token}"`)); } } }); From 4f8b9ec31211b52aef1da131c8ad78b899cdd684 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:31:27 +0800 Subject: [PATCH 014/271] add description for error handling --- src/parser.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.js b/src/parser.js index 3382f1c..f5eec98 100644 --- a/src/parser.js +++ b/src/parser.js @@ -170,12 +170,12 @@ function properWord() { array.join(" ") ); } -function wordFrom(set) { +function wordFrom(set, description) { return word().map((word) => { if (set.has(word)) { return word; } else { - throw new UnrecognizedError(`"${word}"`); + throw new UnrecognizedError(`"${word}" as ${description}`); } }); } @@ -189,5 +189,5 @@ function specificWord(word) { }); } function headWord() { - return wordFrom(HEADWORD); + return wordFrom(HEADWORD, "headword"); } From 1d700442ed16d683ae66f51a10150ac1a6689060 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:35:47 +0800 Subject: [PATCH 015/271] reword readme --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 145bdc3..344dda3 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,14 @@ An imperfect Toki Pona to English translator that translates into multiple sente The goals for this projects are: - Provide translation that covers most of semantics and meaning of a Toki Pona sentence, but it doesn't have to be complete. This gives translations for users to scan into to give them a feel of how broad a Toki Pona sentence can mean. -- As much as possible, provide translations that are grammatically sound: not just correct but also feels right. For example, "red one thing" sounds off than "one red thing". Due to the difference of English and Toki Pona and nuances of English, the translator may fall severely short for this goal, but we can try! +- As much as possible, provide translations that are grammatically sound: not just correct but also feels right. For example, "one red thing" sounds better than "red one thing". Due to the difference of English and Toki Pona and nuances of English, the translator may fall severely short for this goal, but we can try! ## Non-goals - Provide every possible translations. - Handle every edge cases of Toki Pona grammar. Some edge cases are listed in [limitations] along with others. - Handle compounds such as translating "tomo tawa" into "vehicle" -- Translate Tokiponized proper word into what it was before such as translating "Manka" into "Minecraft" +- Translate Tokiponized proper word into Untokiponized word such as translating "Manka" into "Minecraft" Some of these may be lifted in the future. From 81d9b77b81362c95b6bbebae54d66e12332ad6dd Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:41:05 +0800 Subject: [PATCH 016/271] improve specificWord parser --- src/parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index f5eec98..49bc497 100644 --- a/src/parser.js +++ b/src/parser.js @@ -184,7 +184,7 @@ function specificWord(word) { if (word === thisWord) { return thisWord; } else { - throw new UnrecognizedError(`"${thisWord}"`); + throw new UnrecognizedError(`"${thisWord}" instead of "${word}"`); } }); } From 01f391ba9a242ed3cbc85f2bad76cabbdc9bca44 Mon Sep 17 00:00:00 2001 From: neverRare Date: Thu, 11 Jan 2024 18:42:07 +0800 Subject: [PATCH 017/271] small change --- src/parser.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.js b/src/parser.js index 49bc497..7cdaaf3 100644 --- a/src/parser.js +++ b/src/parser.js @@ -165,7 +165,7 @@ function allSpace() { function word() { return match(/([a-z]+)\s*/).map(([_, word]) => word); } -function properWord() { +function properWords() { return all(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => array.join(" ") ); From 6edd156eb23c1689026803e86f7192fc2e20fca2 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 07:59:21 +0800 Subject: [PATCH 018/271] migrate to typescript --- src/{parser.js => parser.ts} | 101 +++++++++++++++------------ src/{vocabulary.js => vocabulary.ts} | 2 +- tsconfig.json | 13 ++++ 3 files changed, 70 insertions(+), 46 deletions(-) rename src/{parser.js => parser.ts} (60%) rename src/{vocabulary.js => vocabulary.ts} (95%) create mode 100644 tsconfig.json diff --git a/src/parser.js b/src/parser.ts similarity index 60% rename from src/parser.js rename to src/parser.ts index 7cdaaf3..1bcfbdb 100644 --- a/src/parser.js +++ b/src/parser.ts @@ -1,11 +1,13 @@ -import { HEADWORD } from "./vocabulary"; +import { HEADWORD } from "./vocabulary.ts"; class ParseError extends Error {} class UnreachableError extends ParseError {} class UnrecognizedError extends ParseError {} -class Output { - constructor(output) { +class Output { + output: Array<{ value: T; rest: string }>; + error: null | Error; + constructor(output: Array<{ value: T; rest: string }> | Error) { if (Array.isArray(output)) { this.output = output; this.error = null; @@ -16,11 +18,11 @@ class Output { throw new Error("passed not array nor error"); } } - push(output) { + push(output: { value: T; rest: string }): void { this.output.push(output); this.error = null; } - append({ output, error }) { + append({ output, error }: Output): void { this.output = [...this.output, ...output]; if (this.output.length > 0) { this.error = null; @@ -28,61 +30,67 @@ class Output { this.error = error; } } - setError(error) { + setError(error: null | Error): void { if (!this.error && this.output.length > 0) { this.error = error; } } - isError() { + isError(): boolean { return this.output.length === 0; } } -class Parser { - constructor(parser) { - this.parser = parser; - } - map(mapper) { +class Parser { + constructor(public readonly parser: (src: string) => Output) {} + map(mapper: (x: T) => U): Parser { return new Parser((src) => { const result = this.parser(src); - if (result.error) { - return result; + if (result.isError()) { + if (result.error) { + return new Output(result.error); + } else { + return new Output([]); + } } - const output = new Output([]); - for (const { value, rest } in result.output) { + const output = new Output([]); + for (const { value, rest } of result.output) { try { output.push({ value: mapper(value), rest }); } catch (error) { - output.setError(error); + if (error instanceof Error) { + output.setError(error); + } else { + throw error; + } } } return output; }); } } -function match(regex) { +function match(regex: RegExp): Parser { const newRegex = new RegExp("^" + regex.source, regex.flags); return new Parser((src) => { const match = src.match(newRegex); if (match) { return new Output([{ value: match, rest: src.slice(match[0].length) }]); } else if (src === "") { - return new UnreachableError(); + return new Output(new UnreachableError()); } else { - const token = src.match(/(.*)(?:\s|$)/)[1]; - if (token === "") { - return new UnreachableError(); - } else { + const token = src.match(/(.*)(?:\s|$)/)?.[1]; + if (token) { return new Output(new UnrecognizedError(`"${token}"`)); + } else { + return new Output(new UnreachableError()); } } }); } -function nothing() { +function nothing(): Parser { return new Parser((src) => { return new Output([{ value: null, rest: src }]); }); } -function eol() { +function eol(): Parser { return new Parser((src) => { if (src === "") { return new Output([{ value: null, rest: "" }]); @@ -91,26 +99,29 @@ function eol() { } }); } -function choice(...choices) { +function choice(...choices: Array>): Parser { return new Parser((src) => { - let output = new Output([]); + let output = new Output([]); for (const parser of choices) { output.append(parser.parser(src)); } return output; }); } -function optional(parser) { +function optional(parser: Parser): Parser { return choice(parser, nothing()); } -function sequence(...sequence) { +function sequence>( + ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } +): Parser { if (sequence.length === 0) { throw new Error("sequences can't be empty"); } + // We resorted to using `any` types here, make sure it works properly return new Parser((src) => { - let wholeOutput = new Output([{ value: [], rest: src }]); + let wholeOutput = new Output([{ value: [], rest: src }]); for (const parser of sequence) { - let newOutput = new Output([]); + let newOutput = new Output([]); for (const { value, rest } of wholeOutput.output) { const { output, error } = parser.parser(rest); if (output.length === 0) { @@ -118,7 +129,7 @@ function sequence(...sequence) { } else { for (const { value: newValue, rest } of output) { newOutput.push({ - value: [...value, ...newValue], + value: [...value, newValue], rest, }); } @@ -129,11 +140,11 @@ function sequence(...sequence) { return wholeOutput; }); } -function all(parser) { +function all(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output([{ value: [], rest: src }]); + let wholeOutput = new Output>([{ value: [], rest: src }]); while (true) { - let newOutput = new Output([]); + let newOutput = new Output>([]); for (const { value, rest } of wholeOutput.output) { const { output, error } = parser.parser(rest); if (output.length === 0) { @@ -141,7 +152,7 @@ function all(parser) { } else { for (const { value: newValue, rest } of output) { newOutput.push({ - value: [...value, ...newValue], + value: [...value, newValue], rest, }); } @@ -156,21 +167,21 @@ function all(parser) { return wholeOutput; }); } -function allAtLeastOnce(parser) { +function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } -function allSpace() { - return new match(/\s*/); +function allSpace(): Parser { + return match(/\s*/).map(([space]) => space); } -function word() { +function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } -function properWords() { +function properWords(): Parser { return all(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => array.join(" ") ); } -function wordFrom(set, description) { +function wordFrom(set: Set, description: string): Parser { return word().map((word) => { if (set.has(word)) { return word; @@ -179,15 +190,15 @@ function wordFrom(set, description) { } }); } -function specificWord(word) { +function specificWord(thatWord: string): Parser { return word().map((thisWord) => { - if (word === thisWord) { + if (thatWord === thisWord) { return thisWord; } else { throw new UnrecognizedError(`"${thisWord}" instead of "${word}"`); } }); } -function headWord() { +function headWord(): Parser { return wordFrom(HEADWORD, "headword"); } diff --git a/src/vocabulary.js b/src/vocabulary.ts similarity index 95% rename from src/vocabulary.js rename to src/vocabulary.ts index 2412205..8107b9a 100644 --- a/src/vocabulary.js +++ b/src/vocabulary.ts @@ -125,7 +125,7 @@ export const HEADWORD = new Set([ "wile", ]); export const MODIFIER = new Set([...HEADWORD, "taso"]); -export const NUMBER = new Set([wan, tu, luka, mute, ale, ali]); +export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]); export const PREVERB = new Set([ "alasa", "awen", diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..b1e183b --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "strict": true, + "strictBindCallApply": true, + "strictFunctionTypes": true, + "strictNullChecks": true, + "strictPropertyInitialization": true, + "lib": ["ES2022"], + "downlevelIteration": true, + "allowImportingTsExtensions": true, + "noEmit": true, + } +} From 74086745ddfe2666a0be6979fa9f76b2e5c21979 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:46:15 +0800 Subject: [PATCH 019/271] implement ast --- src/ast.ts | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 src/ast.ts diff --git a/src/ast.ts b/src/ast.ts new file mode 100644 index 0000000..7e487e6 --- /dev/null +++ b/src/ast.ts @@ -0,0 +1,33 @@ +export type Modifier = + | { type: "word"; word: string } + | { type: "proper words"; words: string } + | { type: "pi"; phrase: Phrase } + | { type: "nanpa ordinal"; phrase: Phrase } + | { type: "cardinal"; number: Array }; + +export type Phrase = { head: string; modifiers: Array }; + +export type Preposition = { preposition: string; phrase: Phrase }; + +export type Clause = + | { type: "en phrase"; phrases: Array } + | { type: "o vocative"; phrases: Array } + | { + type: "li clause"; + subjects: Array; + predicates: Array; + prepositions: Array; + } + | { + type: "o clause"; + subjects: Array; + predicates: Array; + prepositions: Array; + } + | { type: "preposition"; prepositions: Array }; + +export type FullClause = { taso: boolean; clause: Clause }; + +export type Sentence = + | { type: "single clause"; clause: FullClause } + | { type: "la"; left: Clause; right: Sentence }; From 54cfe059240a15ad6f2351e2cea9e60be5f9a534 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:50:30 +0800 Subject: [PATCH 020/271] merge headword and modifier vocabulary as content word --- src/vocabulary.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/vocabulary.ts b/src/vocabulary.ts index 8107b9a..5598bee 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -11,7 +11,7 @@ export const PARTICLES = new Set([ "pi", "taso", ]); -export const HEADWORD = new Set([ +export const CONTENTWORD = new Set([ "akesi", "ala", "alasa", @@ -107,6 +107,7 @@ export const HEADWORD = new Set([ "supa", "suwi", "tan", + "taso", "tawa", "telo", "tenpo", @@ -124,7 +125,6 @@ export const HEADWORD = new Set([ "weka", "wile", ]); -export const MODIFIER = new Set([...HEADWORD, "taso"]); export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]); export const PREVERB = new Set([ "alasa", From 088732114056f5fda43217dd8b1f2186a212a639 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:50:40 +0800 Subject: [PATCH 021/271] update limitations --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 344dda3..83f73e7 100644 --- a/README.md +++ b/README.md @@ -71,5 +71,5 @@ The following are currently unrecognized (non-definitive but pedantic). ✏️ m The whole code is being rewritten and there will be new different limitations. -- ✏️ a particle -- nanpa particle not followed by number words (wan, tu, luka, mute, ale/ali) or pini +- ✏️ "a" particle +- ✏️ "anu" particle From b8d7ae2cd32c087caf7364d40708aeebd7438e76 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:50:58 +0800 Subject: [PATCH 022/271] fix error --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 1bcfbdb..e64b7da 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,4 +1,4 @@ -import { HEADWORD } from "./vocabulary.ts"; +import { CONTENTWORD } from "./vocabulary.ts"; class ParseError extends Error {} class UnreachableError extends ParseError {} @@ -200,5 +200,5 @@ function specificWord(thatWord: string): Parser { }); } function headWord(): Parser { - return wordFrom(HEADWORD, "headword"); + return wordFrom(CONTENTWORD, "headword"); } From c0fc58b28ff243d028cb8fa2caae133c7c25c7f1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:51:34 +0800 Subject: [PATCH 023/271] I should check errors before committing --- src/vocabulary.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/vocabulary.ts b/src/vocabulary.ts index 5598bee..52da32b 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -140,7 +140,6 @@ export const PREVERB = new Set([ export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); export const VOCABULARY = new Set([ ...PARTICLES, - ...HEADWORD, - ...PREVERB, + ...CONTENTWORD, ...PREPOSITION, ]); From e2c071b6382afb663195b7e504801bdc735ddcfc Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:54:00 +0800 Subject: [PATCH 024/271] implement preverb in ast --- src/ast.ts | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 7e487e6..f4b7676 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -7,21 +7,25 @@ export type Modifier = export type Phrase = { head: string; modifiers: Array }; -export type Preposition = { preposition: string; phrase: Phrase }; +export type FullPhrase = + | { type: "default"; phrase: Phrase } + | { type: "preverb"; preverb: string; phrase: Phrase }; + +export type Preposition = { preposition: string; phrase: FullPhrase }; export type Clause = - | { type: "en phrase"; phrases: Array } - | { type: "o vocative"; phrases: Array } + | { type: "en phrase"; phrases: Array } + | { type: "o vocative"; phrases: Array } | { type: "li clause"; - subjects: Array; - predicates: Array; + subjects: Array; + predicates: Array; prepositions: Array; } | { type: "o clause"; - subjects: Array; - predicates: Array; + subjects: Array; + predicates: Array; prepositions: Array; } | { type: "preposition"; prepositions: Array }; From 5c313cd766741aafff35e6d39ef0ad8bdca70aa6 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 08:56:37 +0800 Subject: [PATCH 025/271] further implement preverbs --- src/ast.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index f4b7676..06e252e 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,8 +1,8 @@ export type Modifier = | { type: "word"; word: string } | { type: "proper words"; words: string } - | { type: "pi"; phrase: Phrase } - | { type: "nanpa ordinal"; phrase: Phrase } + | { type: "pi"; phrase: FullPhrase } + | { type: "nanpa ordinal"; phrase: FullPhrase } | { type: "cardinal"; number: Array }; export type Phrase = { head: string; modifiers: Array }; From 333f51055cdfeb669422a81cdea05dc52e45a44f Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 09:08:34 +0800 Subject: [PATCH 026/271] implement modifier parser and more helper functions --- src/parser.ts | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index e64b7da..507e7b8 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,3 +1,4 @@ +import { FullPhrase, Modifier, Phrase } from "./ast.ts"; import { CONTENTWORD } from "./vocabulary.ts"; class ParseError extends Error {} @@ -66,6 +67,12 @@ class Parser { return output; }); } + with(parser: Parser): Parser { + return sequence(this, parser).map(([_, output]) => output); + } + skip(parser: Parser): Parser { + return sequence(this, parser).map(([output, _]) => output); + } } function match(regex: RegExp): Parser { const newRegex = new RegExp("^" + regex.source, regex.flags); @@ -202,3 +209,37 @@ function specificWord(thatWord: string): Parser { function headWord(): Parser { return wordFrom(CONTENTWORD, "headword"); } +function modifier(): Parser { + return choice( + wordFrom(CONTENTWORD, "modifier").map( + (word) => + ({ + type: "word", + word: word, + } as Modifier) + ), + properWords().map((words) => ({ + type: "proper words", + words, + })), + specificWord("pi") + .with(fullPhrase()) + .map((phrase) => ({ + type: "pi", + phrase, + })), + specificWord("nanpa") + .with(fullPhrase()) + .map((phrase) => ({ + type: "nanpa ordinal", + phrase, + })) + // TODO: cardinal modifier + ); +} +function phrase(): Parser { + throw new Error("TODO"); +} +function fullPhrase(): Parser { + throw new Error("TODO"); +} From 0d058c035d174bcdcf933800722a0a7c55208801 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 09:11:27 +0800 Subject: [PATCH 027/271] add many combinator --- src/parser.ts | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index 507e7b8..824f16d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -147,6 +147,35 @@ function sequence>( return wholeOutput; }); } +function many(parser: Parser): Parser> { + return new Parser((src) => { + let wholeOutput = new Output>([{ value: [], rest: src }]); + let currentOutput = new Output>([{ value: [], rest: src }]); + while (true) { + let newOutput = new Output>([]); + for (const { value, rest } of currentOutput.output) { + const { output, error } = parser.parser(rest); + if (output.length === 0) { + newOutput.setError(error); + } else { + for (const { value: newValue, rest } of output) { + newOutput.push({ + value: [...value, newValue], + rest, + }); + } + } + } + if (newOutput.isError()) { + break; + } else { + wholeOutput.append(newOutput); + currentOutput = newOutput; + } + } + return wholeOutput; + }); +} function all(parser: Parser): Parser> { return new Parser((src) => { let wholeOutput = new Output>([{ value: [], rest: src }]); From 674ac7283852287eaebea1eba295e0197dd12a81 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 09:12:20 +0800 Subject: [PATCH 028/271] small change --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 824f16d..ff33d5a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -244,7 +244,7 @@ function modifier(): Parser { (word) => ({ type: "word", - word: word, + word, } as Modifier) ), properWords().map((words) => ({ From 8fc5fa21e3b414673597e7ff5fefb2880d33e02f Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 09:26:24 +0800 Subject: [PATCH 029/271] implement phrase parser and small changes --- src/ast.ts | 2 +- src/parser.ts | 26 +++++++++++++++++++++++--- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 06e252e..14fa03b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -5,7 +5,7 @@ export type Modifier = | { type: "nanpa ordinal"; phrase: FullPhrase } | { type: "cardinal"; number: Array }; -export type Phrase = { head: string; modifiers: Array }; +export type Phrase = { headWord: string; modifiers: Array }; export type FullPhrase = | { type: "default"; phrase: Phrase } diff --git a/src/parser.ts b/src/parser.ts index ff33d5a..f5e33f1 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,5 @@ import { FullPhrase, Modifier, Phrase } from "./ast.ts"; -import { CONTENTWORD } from "./vocabulary.ts"; +import { CONTENTWORD, PREVERB } from "./vocabulary.ts"; class ParseError extends Error {} class UnreachableError extends ParseError {} @@ -267,8 +267,28 @@ function modifier(): Parser { ); } function phrase(): Parser { - throw new Error("TODO"); + return sequence(headWord(), many(modifier())).map( + ([headWord, modifiers]) => ({ + headWord, + modifiers, + }) + ); } function fullPhrase(): Parser { - throw new Error("TODO"); + return sequence(optional(wordFrom(PREVERB, "preverb")), phrase()).map( + ([preverb, phrase]) => { + if (preverb) { + return { + type: "preverb", + preverb, + phrase, + }; + } else { + return { + type: "default", + phrase, + }; + } + } + ); } From d811ae8f637542ae962ef380cc0959c6c6013ab7 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 09:36:32 +0800 Subject: [PATCH 030/271] implement recursive combinator to avoid infinite loop --- src/parser.ts | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index f5e33f1..dfd25a6 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -106,6 +106,9 @@ function eol(): Parser { } }); } +function recursive(parser: () => Parser): Parser { + return new Parser((src) => parser().parser(src)); +} function choice(...choices: Array>): Parser { return new Parser((src) => { let output = new Output([]); @@ -275,20 +278,21 @@ function phrase(): Parser { ); } function fullPhrase(): Parser { - return sequence(optional(wordFrom(PREVERB, "preverb")), phrase()).map( - ([preverb, phrase]) => { - if (preverb) { - return { - type: "preverb", - preverb, - phrase, - }; - } else { - return { - type: "default", - phrase, - }; - } + return sequence( + optional(wordFrom(PREVERB, "preverb")), + recursive(phrase) + ).map(([preverb, phrase]) => { + if (preverb) { + return { + type: "preverb", + preverb, + phrase, + }; + } else { + return { + type: "default", + phrase, + }; } - ); + }); } From b8c72715f6710ff01bee443afbb56b64ace67817 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 10:13:05 +0800 Subject: [PATCH 031/271] fix infinite loop --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index dfd25a6..54520d4 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -216,7 +216,7 @@ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } function properWords(): Parser { - return all(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => + return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => array.join(" ") ); } From e8bae378a9c89c97d2ad604dbd44bcf2d9295352 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 10:22:59 +0800 Subject: [PATCH 032/271] format --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 54520d4..0477f9c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -216,8 +216,8 @@ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } function properWords(): Parser { - return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map((array) => - array.join(" ") + return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map( + (array) => array.join(" ") ); } function wordFrom(set: Set, description: string): Parser { From 5c34770ff2b35b5fc582f829af26cc1662eb9b02 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 10:50:21 +0800 Subject: [PATCH 033/271] implement multiple parsers --- src/ast.ts | 2 +- src/parser.ts | 50 +++++++++++++++++++++++++++++++++++++++++++---- src/vocabulary.ts | 5 +++-- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 14fa03b..3cc916b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -14,7 +14,7 @@ export type FullPhrase = export type Preposition = { preposition: string; phrase: FullPhrase }; export type Clause = - | { type: "en phrase"; phrases: Array } + | { type: "en phrases"; phrases: Array } | { type: "o vocative"; phrases: Array } | { type: "li clause"; diff --git a/src/parser.ts b/src/parser.ts index 0477f9c..9a567ce 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,5 +1,10 @@ -import { FullPhrase, Modifier, Phrase } from "./ast.ts"; -import { CONTENTWORD, PREVERB } from "./vocabulary.ts"; +import { Clause, FullPhrase, Modifier, Phrase, Preposition } from "./ast.ts"; +import { + CONTENT_WORD, + PREPOSITION, + PREVERB, + SPECIAL_SUBJECT, +} from "./vocabulary.ts"; class ParseError extends Error {} class UnreachableError extends ParseError {} @@ -206,6 +211,12 @@ function all(parser: Parser): Parser> { return wholeOutput; }); } +function manyAtLeastOnce(parser: Parser): Parser> { + return sequence(parser, many(parser)).map(([first, rest]) => [ + first, + ...rest, + ]); +} function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } @@ -239,11 +250,11 @@ function specificWord(thatWord: string): Parser { }); } function headWord(): Parser { - return wordFrom(CONTENTWORD, "headword"); + return wordFrom(CONTENT_WORD, "headword"); } function modifier(): Parser { return choice( - wordFrom(CONTENTWORD, "modifier").map( + wordFrom(CONTENT_WORD, "modifier").map( (word) => ({ type: "word", @@ -296,3 +307,34 @@ function fullPhrase(): Parser { } }); } +function preposition(): Parser { + return sequence(wordFrom(PREPOSITION, "preposition"), fullPhrase()).map( + ([preposition, phrase]) => ({ + preposition, + phrase, + }) + ); +} +function enPhrases(): Parser> { + return sequence( + fullPhrase(), + many(specificWord("en").with(fullPhrase())) + ).map(([first, rest]) => [first, ...rest]); +} +function clause(): Parser { + return choice( + enPhrases().map( + (phrases) => + ({ + type: "en phrases", + phrases, + } as Clause) + ), + enPhrases() + .skip(specificWord("o")) + .map((phrases) => ({ + type: "o vocative", + phrases, + })) + ); +} diff --git a/src/vocabulary.ts b/src/vocabulary.ts index 52da32b..e5e683a 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -11,7 +11,7 @@ export const PARTICLES = new Set([ "pi", "taso", ]); -export const CONTENTWORD = new Set([ +export const CONTENT_WORD = new Set([ "akesi", "ala", "alasa", @@ -125,6 +125,7 @@ export const CONTENTWORD = new Set([ "weka", "wile", ]); +export const SPECIAL_SUBJECT = new Set(["mi", "sina"]); export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]); export const PREVERB = new Set([ "alasa", @@ -140,6 +141,6 @@ export const PREVERB = new Set([ export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); export const VOCABULARY = new Set([ ...PARTICLES, - ...CONTENTWORD, + ...CONTENT_WORD, ...PREPOSITION, ]); From 2ab32467f8baab8f1025e9457b6007a3da1c88f3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:01:29 +0800 Subject: [PATCH 034/271] add prepositional predicate --- src/ast.ts | 8 ++++++-- src/parser.ts | 17 ++++++++++++++++- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 3cc916b..5d82ba4 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -13,19 +13,23 @@ export type FullPhrase = export type Preposition = { preposition: string; phrase: FullPhrase }; +export type Predicate = + | { type: "default"; predicate: FullPhrase } + | { type: "preposition"; preposition: Preposition }; + export type Clause = | { type: "en phrases"; phrases: Array } | { type: "o vocative"; phrases: Array } | { type: "li clause"; subjects: Array; - predicates: Array; + predicates: Array; prepositions: Array; } | { type: "o clause"; subjects: Array; - predicates: Array; + predicates: Array; prepositions: Array; } | { type: "preposition"; prepositions: Array }; diff --git a/src/parser.ts b/src/parser.ts index 9a567ce..0817ef4 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,4 +1,11 @@ -import { Clause, FullPhrase, Modifier, Phrase, Preposition } from "./ast.ts"; +import { + Clause, + FullPhrase, + Modifier, + Phrase, + Predicate, + Preposition, +} from "./ast.ts"; import { CONTENT_WORD, PREPOSITION, @@ -321,6 +328,14 @@ function enPhrases(): Parser> { many(specificWord("en").with(fullPhrase())) ).map(([first, rest]) => [first, ...rest]); } +function predicate(): Parser { + return choice( + fullPhrase().map( + (predicate) => ({ type: "default", predicate } as Predicate) + ), + preposition().map((preposition) => ({ type: "preposition", preposition })) + ); +} function clause(): Parser { return choice( enPhrases().map( From 62bb58662df8fac52f69c6a2aeb2a085659177cf Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:10:46 +0800 Subject: [PATCH 035/271] implement full clause --- src/ast.ts | 2 +- src/parser.ts | 39 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 39 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 5d82ba4..4c9054b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -32,7 +32,7 @@ export type Clause = predicates: Array; prepositions: Array; } - | { type: "preposition"; prepositions: Array }; + | { type: "prepositions"; prepositions: Array }; export type FullClause = { taso: boolean; clause: Clause }; diff --git a/src/parser.ts b/src/parser.ts index 0817ef4..efe0d8b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -350,6 +350,43 @@ function clause(): Parser { .map((phrases) => ({ type: "o vocative", phrases, - })) + })), + sequence( + wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), + predicate(), + many(specificWord("li").with(predicate())), + many(preposition()) + ).map(([subject, predicate, morePredicates, prepositions]) => ({ + type: "li clause", + subjects: [ + { type: "default", phrase: { headWord: subject, modifiers: [] } }, + ], + predicates: [predicate, ...morePredicates], + prepositions, + })), + sequence( + enPhrases(), + manyAtLeastOnce(specificWord("li").with(predicate())), + many(preposition()) + ).map(([subjects, predicates, prepositions]) => ({ + type: "li clause", + subjects, + predicates, + prepositions, + })), + sequence( + enPhrases(), + manyAtLeastOnce(specificWord("o").with(predicate())), + many(preposition()) + ).map(([subjects, predicates, prepositions]) => ({ + type: "o clause", + subjects, + predicates, + prepositions, + })), + manyAtLeastOnce(preposition()).map((prepositions) => ({ + type: "prepositions", + prepositions, + })) ); } From 53c41459a9b10d1b62d0ca9de9d45f20b7ac187d Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:18:45 +0800 Subject: [PATCH 036/271] implement full clause and sentence parser --- src/ast.ts | 2 +- src/parser.ts | 26 ++++++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/src/ast.ts b/src/ast.ts index 4c9054b..0586d28 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -38,4 +38,4 @@ export type FullClause = { taso: boolean; clause: Clause }; export type Sentence = | { type: "single clause"; clause: FullClause } - | { type: "la"; left: Clause; right: Sentence }; + | { type: "la"; left: FullClause; right: Sentence }; diff --git a/src/parser.ts b/src/parser.ts index efe0d8b..25d7c4c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,10 +1,12 @@ import { Clause, + FullClause, FullPhrase, Modifier, Phrase, Predicate, Preposition, + Sentence, } from "./ast.ts"; import { CONTENT_WORD, @@ -390,3 +392,27 @@ function clause(): Parser { })) ); } +function fullClause(): Parser { + return sequence(optional(specificWord("taso")), clause()).map( + ([taso, clause]) => ({ + taso: !!taso, + clause, + }) + ); +} +function sentence(): Parser { + return choice( + fullClause().map( + (clause) => ({ type: "single clause", clause } as Sentence) + ), + sequence(fullClause().skip(specificWord("la")), recursive(sentence)).map( + ([left, right]) => ({ type: "la", left, right }) + ) + ); +} +function fullSentence(): Parser { + return allSpace() + .with(sentence()) + .skip(optional(match(/\./))) + .skip(allSpace()); +} From fa402afe01948dc8a6f924eef294d112de06a699 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:22:53 +0800 Subject: [PATCH 037/271] order matters --- src/parser.ts | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 25d7c4c..ae041c1 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -340,19 +340,6 @@ function predicate(): Parser { } function clause(): Parser { return choice( - enPhrases().map( - (phrases) => - ({ - type: "en phrases", - phrases, - } as Clause) - ), - enPhrases() - .skip(specificWord("o")) - .map((phrases) => ({ - type: "o vocative", - phrases, - })), sequence( wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), predicate(), @@ -366,6 +353,19 @@ function clause(): Parser { predicates: [predicate, ...morePredicates], prepositions, })), + enPhrases().map( + (phrases) => + ({ + type: "en phrases", + phrases, + } as Clause) + ), + enPhrases() + .skip(specificWord("o")) + .map((phrases) => ({ + type: "o vocative", + phrases, + })), sequence( enPhrases(), manyAtLeastOnce(specificWord("li").with(predicate())), @@ -414,5 +414,6 @@ function fullSentence(): Parser { return allSpace() .with(sentence()) .skip(optional(match(/\./))) - .skip(allSpace()); + .skip(allSpace()) + .skip(eol()); } From ed964bf25a3ddb44afca2812c4fad76124a1e8b3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:27:38 +0800 Subject: [PATCH 038/271] order matters --- src/parser.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index ae041c1..ca08db4 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -263,6 +263,12 @@ function headWord(): Parser { } function modifier(): Parser { return choice( + specificWord("nanpa") + .with(fullPhrase()) + .map((phrase) => ({ + type: "nanpa ordinal", + phrase, + })), wordFrom(CONTENT_WORD, "modifier").map( (word) => ({ @@ -279,12 +285,6 @@ function modifier(): Parser { .map((phrase) => ({ type: "pi", phrase, - })), - specificWord("nanpa") - .with(fullPhrase()) - .map((phrase) => ({ - type: "nanpa ordinal", - phrase, })) // TODO: cardinal modifier ); @@ -332,10 +332,10 @@ function enPhrases(): Parser> { } function predicate(): Parser { return choice( + preposition().map((preposition) => ({ type: "preposition", preposition })), fullPhrase().map( (predicate) => ({ type: "default", predicate } as Predicate) - ), - preposition().map((preposition) => ({ type: "preposition", preposition })) + ) ); } function clause(): Parser { From c7959f09de8d5f9f2022a1942cd7a8959621a946 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:28:50 +0800 Subject: [PATCH 039/271] small change --- src/ast.ts | 2 +- src/parser.ts | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 0586d28..f173612 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -38,4 +38,4 @@ export type FullClause = { taso: boolean; clause: Clause }; export type Sentence = | { type: "single clause"; clause: FullClause } - | { type: "la"; left: FullClause; right: Sentence }; + | { type: "la clauses"; left: FullClause; right: Sentence }; diff --git a/src/parser.ts b/src/parser.ts index ca08db4..17935e9 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -406,7 +406,7 @@ function sentence(): Parser { (clause) => ({ type: "single clause", clause } as Sentence) ), sequence(fullClause().skip(specificWord("la")), recursive(sentence)).map( - ([left, right]) => ({ type: "la", left, right }) + ([left, right]) => ({ type: "la clauses", left, right }) ) ); } From 9a5a08f48124b71e1ea4b883a8bb1e61bdd50b5d Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:33:36 +0800 Subject: [PATCH 040/271] separate error --- src/error.ts | 2 ++ src/parser.ts | 5 +---- 2 files changed, 3 insertions(+), 4 deletions(-) create mode 100644 src/error.ts diff --git a/src/error.ts b/src/error.ts new file mode 100644 index 0000000..fffbc7a --- /dev/null +++ b/src/error.ts @@ -0,0 +1,2 @@ +export class UnreachableError extends Error {} +export class UnrecognizedError extends Error {} diff --git a/src/parser.ts b/src/parser.ts index 17935e9..808caae 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -8,6 +8,7 @@ import { Preposition, Sentence, } from "./ast.ts"; +import { UnreachableError, UnrecognizedError } from "./error.ts"; import { CONTENT_WORD, PREPOSITION, @@ -15,10 +16,6 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; -class ParseError extends Error {} -class UnreachableError extends ParseError {} -class UnrecognizedError extends ParseError {} - class Output { output: Array<{ value: T; rest: string }>; error: null | Error; From 4d523773b37a5702252ed4906b5007307348445b Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:37:00 +0800 Subject: [PATCH 041/271] change error handling --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 808caae..98614b7 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -43,7 +43,7 @@ class Output { } } setError(error: null | Error): void { - if (!this.error && this.output.length > 0) { + if (!this.error && this.output.length === 0) { this.error = error; } } From bb6cd501df2bcab9a16414dca84ebc91def6419b Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:50:52 +0800 Subject: [PATCH 042/271] separate output --- src/output.ts | 35 ++++++++++++++++++++++++++++ src/parser.ts | 64 ++++++++++++++------------------------------------- 2 files changed, 52 insertions(+), 47 deletions(-) create mode 100644 src/output.ts diff --git a/src/output.ts b/src/output.ts new file mode 100644 index 0000000..6c65042 --- /dev/null +++ b/src/output.ts @@ -0,0 +1,35 @@ +export class Output { + output: Array; + error: null | Error; + constructor(output: Array | Error) { + if (Array.isArray(output)) { + this.output = output; + this.error = null; + } else if (output instanceof Error) { + this.output = []; + this.error = output; + } else { + throw new Error("passed not array nor error"); + } + } + push(output: T): void { + this.output.push(output); + this.error = null; + } + append({ output, error }: Output): void { + this.output = [...this.output, ...output]; + if (this.output.length > 0) { + this.error = null; + } else { + this.error = error; + } + } + setError(error: null | Error): void { + if (!this.error && this.output.length === 0) { + this.error = error; + } + } + isError(): boolean { + return this.output.length === 0; + } +} diff --git a/src/parser.ts b/src/parser.ts index 98614b7..269fd27 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -9,6 +9,7 @@ import { Sentence, } from "./ast.ts"; import { UnreachableError, UnrecognizedError } from "./error.ts"; +import { Output } from "./output.ts"; import { CONTENT_WORD, PREPOSITION, @@ -16,54 +17,21 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; -class Output { - output: Array<{ value: T; rest: string }>; - error: null | Error; - constructor(output: Array<{ value: T; rest: string }> | Error) { - if (Array.isArray(output)) { - this.output = output; - this.error = null; - } else if (output instanceof Error) { - this.output = []; - this.error = output; - } else { - throw new Error("passed not array nor error"); - } - } - push(output: { value: T; rest: string }): void { - this.output.push(output); - this.error = null; - } - append({ output, error }: Output): void { - this.output = [...this.output, ...output]; - if (this.output.length > 0) { - this.error = null; - } else { - this.error = error; - } - } - setError(error: null | Error): void { - if (!this.error && this.output.length === 0) { - this.error = error; - } - } - isError(): boolean { - return this.output.length === 0; - } -} +type ValueRest = {value: T; rest: string}; +type ParserOutput = Output>; class Parser { - constructor(public readonly parser: (src: string) => Output) {} + constructor(public readonly parser: (src: string) => ParserOutput) {} map(mapper: (x: T) => U): Parser { return new Parser((src) => { const result = this.parser(src); if (result.isError()) { if (result.error) { - return new Output(result.error); + return new Output>(result.error); } else { return new Output([]); } } - const output = new Output([]); + const output = new Output>([]); for (const { value, rest } of result.output) { try { output.push({ value: mapper(value), rest }); @@ -90,7 +58,9 @@ function match(regex: RegExp): Parser { return new Parser((src) => { const match = src.match(newRegex); if (match) { - return new Output([{ value: match, rest: src.slice(match[0].length) }]); + return new Output([ + { value: match, rest: src.slice(match[0].length) }, + ]); } else if (src === "") { return new Output(new UnreachableError()); } else { @@ -122,7 +92,7 @@ function recursive(parser: () => Parser): Parser { } function choice(...choices: Array>): Parser { return new Parser((src) => { - let output = new Output([]); + let output = new Output>([]); for (const parser of choices) { output.append(parser.parser(src)); } @@ -140,9 +110,9 @@ function sequence>( } // We resorted to using `any` types here, make sure it works properly return new Parser((src) => { - let wholeOutput = new Output([{ value: [], rest: src }]); + let wholeOutput = new Output>([{ value: [], rest: src }]); for (const parser of sequence) { - let newOutput = new Output([]); + let newOutput = new Output>([]); for (const { value, rest } of wholeOutput.output) { const { output, error } = parser.parser(rest); if (output.length === 0) { @@ -163,10 +133,10 @@ function sequence>( } function many(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output>([{ value: [], rest: src }]); - let currentOutput = new Output>([{ value: [], rest: src }]); + let wholeOutput = new Output>>([{ value: [], rest: src }]); + let currentOutput = new Output>>([{ value: [], rest: src }]); while (true) { - let newOutput = new Output>([]); + let newOutput = new Output>>([]); for (const { value, rest } of currentOutput.output) { const { output, error } = parser.parser(rest); if (output.length === 0) { @@ -192,9 +162,9 @@ function many(parser: Parser): Parser> { } function all(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output>([{ value: [], rest: src }]); + let wholeOutput = new Output>>([{ value: [], rest: src }]); while (true) { - let newOutput = new Output>([]); + let newOutput = new Output>>([]); for (const { value, rest } of wholeOutput.output) { const { output, error } = parser.parser(rest); if (output.length === 0) { From f46cc5c180d042437cf2b2ef76fbe1c684d158c8 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:51:26 +0800 Subject: [PATCH 043/271] add translation module --- src/translation.ts | 340 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 340 insertions(+) create mode 100644 src/translation.ts diff --git a/src/translation.ts b/src/translation.ts new file mode 100644 index 0000000..81f6d7a --- /dev/null +++ b/src/translation.ts @@ -0,0 +1,340 @@ +export const NOUN = { + akesi: ["reptile", "reptiles", "amphibian", "amphibians"], + ala: ["nothing", "no"], + alasa: ["searching"], + ale: ["everything"], + ali: ["everything"], + anpa: ["bottom", "bottoms", "under"], + ante: ["changing"], + awen: ["staying"], + esun: ["shop", "shops"], + ijo: ["thing", "things"], + ike: ["badness"], + ilo: ["tool", "tools"], + insa: ["inside", "insides"], + jaki: ["obscenity", "obscenities"], + jan: ["person", "people", "human", "humans", "humanity"], + jelo: ["yellowness"], + jo: ["possession", "possessions"], + kala: ["fish", "fishes"], + kalama: ["sound", "sounds"], + kama: ["arriving"], + kasi: ["plant", "plants"], + ken: ["ability", "abilities", "possibility", "possibilities"], + kili: ["fruit", "fruits", "vegetable", "vegetables"], + kiwen: ["hard thing", "hard things"], + ko: ["soft thing", "soft things", "powder"], + kon: ["air", "essence"], + kule: ["color", "colors"], + kulupu: ["group", "groups"], + kute: ["ear", "ears", "listening"], + lape: ["sleep", "rest"], + laso: ["blueness", "greenness"], + lawa: ["head", "heads", "control", "controls"], + len: ["cloth", "clothes", "hiding"], + lete: ["coldness"], + lili: ["smallness"], + linja: ["long flexible thing", "long flexible things"], + lipu: ["book", "books", "paper", "paper-like thing", "paper-like things"], + loje: ["redness"], + lon: ["truth", "true"], + luka: ["hand", "hands", "arm", "arms"], + lukin: ["eye", "eyes", "sight"], + lupa: ["hole", "holes"], + ma: ["place", "places", "earth"], + mama: ["parent", "parents", "creator", "creators"], + mani: ["money", "large domestic animal", "large domestic animals"], + meli: ["woman", "women", "feminity"], + mi: ["I", "me", "we", "us"], + mije: ["man", "men", "masculinity"], + moku: ["food", "foods", "drink", "drinks"], + moli: ["death"], + monsi: ["back"], + mu: ["moo"], + mun: ["celestial object", "celestial objects", "glowing thing"], + musi: ["entertainment", "entertainments"], + mute: ["many"], + nanpa: ["number", "numbers"], + nasa: ["silliness", "strangeness"], + nasin: ["way"], + nena: ["bump"], + ni: ["this", "that"], + nimi: ["name", "names", "word", "words"], + noka: ["foot", "feet", "leg", "legs"], + olin: ["love"], + ona: ["they", "them", "it"], + open: ["beginning", "beginnings"], + pakala: ["mistake", "mistakes"], + pan: ["grain", "grains"], + pana: ["giving"], + pali: ["work"], + palisa: ["long hard thing", "long hard things"], + pilin: ["emotion", "emotions"], + pimeja: ["blackness", "brownness", "grayness"], + pini: ["end", "ends"], + pipi: ["insect", "insects", "bug", "bugs"], + poka: ["side", "sides", "hips"], + poki: ["container"], + pona: ["goodness", "simplicity"], + sama: ["similarity"], + seli: ["fire", "heat", "chemical reaction", "chemical reactions"], + selo: ["outer form", "skin", "boundary", "boundaries"], + seme: ["what", "which"], + sewi: ["above", "divinity"], + sijelo: ["body", "bodies"], + sike: ["round thing", "round things", "cycle"], + sin: ["new thing", "new things"], + sina: ["you", "you all"], + sinpin: ["face", "faces", "wall", "walls"], + sitelen: ["writing", "writings", "image", "images"], + sona: ["knowledge"], + soweli: ["animal", "animals"], + suli: ["hugeness", "importance"], + suno: ["light source", "light sources", "sun"], + supa: ["horizontal surface", "horizontal surfaces"], + suwi: ["sweetness", "cuteness", "innocence"], + tan: ["reason", "origin"], + taso: [], + tawa: ["movement"], + telo: ["liquid"], + tenpo: ["time"], + toki: ["communication", "communications", "language", "languages", "hello"], + tomo: ["house", "houses"], + tonsi: ["transgender", "transgenders", "non-binary", "non-binaries"], + tu: ["pair"], + unpa: ["sex"], + uta: ["mouth"], + utala: ["conflict", "difficulty"], + walo: ["whiteness", "paleness"], + wan: ["one"], + waso: ["bird", "birds"], + wawa: ["power", "powers"], + weka: ["leaving"], + wile: ["want", "wants", "need", "needs"], + }; + export const ADJECTIVE = { + akesi: ["reptilian", "amphibian"], + ala: ["not", "no"], + alasa: [], + ale: ["all"], + ali: ["all"], + anpa: ["bottom"], + ante: ["different", "other"], + awen: ["staying"], + esun: [], + ijo: [], + ike: ["bad"], + ilo: [], + insa: [], + jaki: ["gross"], + jan: ["person-like"], + jelo: ["yellow"], + jo: [], + kala: ["fish-like"], + kalama: ["sounding"], + kama: ["arriving"], + kasi: ["plant-like"], + ken: [], + kili: [], + kiwen: ["hard"], + ko: ["soft"], + kon: [], + kule: ["colorful"], + kulupu: [], + kute: [], + lape: ["sleeping"], + laso: ["blue", "green"], + lawa: ["controlling"], + len: ["hidden"], + lete: ["cold", "uncooked"], + lili: ["small"], + linja: ["long flexible"], + lipu: ["paper-like"], + loje: ["red"], + lon: ["truthful"], + luka: [], + lukin: [], + lupa: [], + ma: ["earthy"], + mama: [], + mani: [], + meli: ["woman", "feminine"], + mi: ["my", "our"], + mije: ["man", "masculine"], + moku: [], + moli: ["dead", "deadly"], + monsi: [], + mu: ["mooing"], + mun: ["glowing"], + musi: ["entertaining"], + mute: ["many"], + nanpa: ["numeric"], + nasa: ["silly", "strange"], + nasin: [], + nena: [], + ni: ["this", "that"], + nimi: [], + noka: [], + olin: [], + ona: ["their", "its"], + open: [], + pakala: ["broken"], + pan: [], + pana: [], + pali: ["working"], + palisa: ["long hard"], + pilin: [], + pimeja: ["black", "brown", "gray"], + pini: ["ended"], + pipi: ["bug-like", "insect-like"], + poka: [], + poki: [], + pona: ["good", "simple"], + sama: [], + seli: ["hot"], + selo: [], + seme: ["what", "which"], + sewi: ["divine"], + sijelo: [], + sike: ["round"], + sin: ["new"], + sina: ["your"], + sinpin: [], + sitelen: [], + sona: ["knowledgeable"], + soweli: ["animal-like"], + suli: ["huge", "important"], + suno: ["shining"], + supa: [], + suwi: ["sweet", "cute", "innocent"], + tan: [], + tawa: ["moving"], + telo: ["liquid"], + tenpo: [], + toki: ["communicating"], + tomo: [], + tonsi: ["transgender", "non-binary"], + tu: ["two"], + unpa: ["sexual"], + uta: [], + utala: ["conflicting", "difficult"], + walo: ["white", "pale"], + wan: ["one"], + waso: ["bird-like"], + wawa: ["powerful"], + weka: ["leaving"], + wile: [], + }; + export const ADVERB = { + akesi: [], + ala: ["not"], + alasa: [], + ale: ["completely"], + ali: ["completely"], + anpa: [], + ante: ["differently"], + awen: [], + esun: [], + ijo: [], + ike: ["badly"], + ilo: [], + insa: [], + jaki: ["disgustingly"], + jan: [], + jelo: [], + jo: [], + kala: [], + kalama: [], + kama: [], + kasi: [], + ken: [], + kili: [], + kiwen: [], + ko: [], + kon: [], + kule: ["colorfully"], + kulupu: [], + kute: [], + lape: [], + laso: [], + lawa: [], + len: [], + lete: [], + lili: ["slightly"], + linja: [], + lipu: [], + loje: [], + lon: ["truthfully"], + luka: [], + lukin: [], + lupa: [], + ma: [], + mama: [], + mani: [], + meli: [], + mi: [], + mije: [], + moku: [], + moli: [], + monsi: [], + mu: [], + mun: [], + musi: ["entertainingly"], + mute: ["very"], + nanpa: ["numerically"], + nasa: ["strangely"], + nasin: [], + nena: [], + ni: [], + nimi: [], + noka: [], + olin: [], + ona: [], + open: [], + pakala: [], + pan: [], + pana: [], + pali: [], + palisa: [], + pilin: [], + pimeja: [], + pini: [], + pipi: [], + poka: [], + poki: [], + pona: ["nicely"], + sama: ["equally"], + seli: [], + selo: [], + seme: [], + sewi: ["divinely"], + sijelo: [], + sike: ["repeatedly"], + sin: ["newly"], + sina: [], + sinpin: [], + sitelen: [], + sona: [], + soweli: [], + suli: ["hugely", "importantly"], + suno: [], + supa: [], + suwi: ["sweetly"], + tan: [], + tawa: [], + telo: [], + tenpo: [], + toki: [], + tomo: [], + tonsi: [], + tu: [], + unpa: ["sexually"], + uta: [], + utala: ["conflictingly", "difficultly"], + walo: [], + wan: [], + waso: [], + wawa: ["powerfully"], + weka: [], + wile: [], + }; \ No newline at end of file From 2ca830cc953c1f4dd1e0ffe6d498d598a9072f31 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:55:07 +0800 Subject: [PATCH 044/271] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7104ba7..9dd86b8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ For this version. The whole code has been rewritten to be easier to modify. - Rewrite parser to be more declarative. - Rewrite whole code to be modular. +- Dropped support for "a" particle. ## 0.1.1 From 8fde620675bda44725f7339b411750f516793493 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:59:11 +0800 Subject: [PATCH 045/271] update changelog --- CHANGELOG.md | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9dd86b8..43f6072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,12 +2,16 @@ ## 0.2.0 (On development) -For this version. The whole code has been rewritten to be easier to modify. +For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. -- Rewrite parser to be more declarative. -- Rewrite whole code to be modular. +- New limitation list. - Dropped support for "a" particle. +Inside update (intended for developers): + +- Rewritten whole code to use module and typescript. +- Rewritten parser to use parser combinator. + ## 0.1.1 - Update copyright notice From 3903f56b5dd7693af0a7e60ef55a3e0cb17601e5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 11:59:33 +0800 Subject: [PATCH 046/271] formatting fix --- CHANGELOG.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 43f6072..525bf15 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,9 +14,9 @@ Inside update (intended for developers): ## 0.1.1 -- Update copyright notice -- Update version number on the page -- Update contacts to discord (from `neverRare#1517` to `never_rare`) +- Update copyright notice. +- Update version number on the page. +- Update contacts to Discord. (from `neverRare#1517` to `never_rare`) ## 0.1.0 From 1abc00a0caca54bb20e3c2f1c14ebefe93bbda4b Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:19:18 +0800 Subject: [PATCH 047/271] update errors --- src/error.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/error.ts b/src/error.ts index fffbc7a..a8409fe 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1,2 +1,10 @@ -export class UnreachableError extends Error {} -export class UnrecognizedError extends Error {} +export class UnreachableError extends Error { + constructor() { + super("This is an error you shouldn't see... Please report this error."); + } +} +export class UnrecognizedError extends Error { + constructor(token) { + super(`${token} is unrecognized`); + } +} From 716ddce62e65d89af8ff39530596b25ba9da7e53 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:19:41 +0800 Subject: [PATCH 048/271] whoops --- src/error.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/error.ts b/src/error.ts index a8409fe..901f319 100644 --- a/src/error.ts +++ b/src/error.ts @@ -4,7 +4,7 @@ export class UnreachableError extends Error { } } export class UnrecognizedError extends Error { - constructor(token) { + constructor(token: string) { super(`${token} is unrecognized`); } } From 63064667576a48e655ca4fcdec61ecaf31e4f036 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:19:55 +0800 Subject: [PATCH 049/271] period --- src/error.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/error.ts b/src/error.ts index 901f319..2c9a819 100644 --- a/src/error.ts +++ b/src/error.ts @@ -5,6 +5,6 @@ export class UnreachableError extends Error { } export class UnrecognizedError extends Error { constructor(token: string) { - super(`${token} is unrecognized`); + super(`${token} is unrecognized.`); } } From a5623d90ff18076a3e00c83fd18f001566469cda Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:32:37 +0800 Subject: [PATCH 050/271] add final parser function --- src/output.ts | 3 +++ src/parser.ts | 23 ++++++++++++++++------- 2 files changed, 19 insertions(+), 7 deletions(-) diff --git a/src/output.ts b/src/output.ts index 6c65042..896d5c1 100644 --- a/src/output.ts +++ b/src/output.ts @@ -32,4 +32,7 @@ export class Output { isError(): boolean { return this.output.length === 0; } + map(mapper: (x: T) => U): Output { + return new Output(this.output.map(mapper)); + } } diff --git a/src/parser.ts b/src/parser.ts index 269fd27..f08d44c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -17,7 +17,7 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; -type ValueRest = {value: T; rest: string}; +type ValueRest = { value: T; rest: string }; type ParserOutput = Output>; class Parser { constructor(public readonly parser: (src: string) => ParserOutput) {} @@ -58,9 +58,7 @@ function match(regex: RegExp): Parser { return new Parser((src) => { const match = src.match(newRegex); if (match) { - return new Output([ - { value: match, rest: src.slice(match[0].length) }, - ]); + return new Output([{ value: match, rest: src.slice(match[0].length) }]); } else if (src === "") { return new Output(new UnreachableError()); } else { @@ -133,8 +131,12 @@ function sequence>( } function many(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output>>([{ value: [], rest: src }]); - let currentOutput = new Output>>([{ value: [], rest: src }]); + let wholeOutput = new Output>>([ + { value: [], rest: src }, + ]); + let currentOutput = new Output>>([ + { value: [], rest: src }, + ]); while (true) { let newOutput = new Output>>([]); for (const { value, rest } of currentOutput.output) { @@ -162,7 +164,9 @@ function many(parser: Parser): Parser> { } function all(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output>>([{ value: [], rest: src }]); + let wholeOutput = new Output>>([ + { value: [], rest: src }, + ]); while (true) { let newOutput = new Output>>([]); for (const { value, rest } of wholeOutput.output) { @@ -384,3 +388,8 @@ function fullSentence(): Parser { .skip(allSpace()) .skip(eol()); } +export function parser(src: string): Output { + return fullSentence() + .parser(src) + .map(({ value }) => value); +} From 3a74c24967f3b94b0765d5687e7e1d5bafc57504 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:34:27 +0800 Subject: [PATCH 051/271] format --- src/translation.ts | 678 ++++++++++++++++++++++----------------------- 1 file changed, 339 insertions(+), 339 deletions(-) diff --git a/src/translation.ts b/src/translation.ts index 81f6d7a..6231d0d 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -1,340 +1,340 @@ export const NOUN = { - akesi: ["reptile", "reptiles", "amphibian", "amphibians"], - ala: ["nothing", "no"], - alasa: ["searching"], - ale: ["everything"], - ali: ["everything"], - anpa: ["bottom", "bottoms", "under"], - ante: ["changing"], - awen: ["staying"], - esun: ["shop", "shops"], - ijo: ["thing", "things"], - ike: ["badness"], - ilo: ["tool", "tools"], - insa: ["inside", "insides"], - jaki: ["obscenity", "obscenities"], - jan: ["person", "people", "human", "humans", "humanity"], - jelo: ["yellowness"], - jo: ["possession", "possessions"], - kala: ["fish", "fishes"], - kalama: ["sound", "sounds"], - kama: ["arriving"], - kasi: ["plant", "plants"], - ken: ["ability", "abilities", "possibility", "possibilities"], - kili: ["fruit", "fruits", "vegetable", "vegetables"], - kiwen: ["hard thing", "hard things"], - ko: ["soft thing", "soft things", "powder"], - kon: ["air", "essence"], - kule: ["color", "colors"], - kulupu: ["group", "groups"], - kute: ["ear", "ears", "listening"], - lape: ["sleep", "rest"], - laso: ["blueness", "greenness"], - lawa: ["head", "heads", "control", "controls"], - len: ["cloth", "clothes", "hiding"], - lete: ["coldness"], - lili: ["smallness"], - linja: ["long flexible thing", "long flexible things"], - lipu: ["book", "books", "paper", "paper-like thing", "paper-like things"], - loje: ["redness"], - lon: ["truth", "true"], - luka: ["hand", "hands", "arm", "arms"], - lukin: ["eye", "eyes", "sight"], - lupa: ["hole", "holes"], - ma: ["place", "places", "earth"], - mama: ["parent", "parents", "creator", "creators"], - mani: ["money", "large domestic animal", "large domestic animals"], - meli: ["woman", "women", "feminity"], - mi: ["I", "me", "we", "us"], - mije: ["man", "men", "masculinity"], - moku: ["food", "foods", "drink", "drinks"], - moli: ["death"], - monsi: ["back"], - mu: ["moo"], - mun: ["celestial object", "celestial objects", "glowing thing"], - musi: ["entertainment", "entertainments"], - mute: ["many"], - nanpa: ["number", "numbers"], - nasa: ["silliness", "strangeness"], - nasin: ["way"], - nena: ["bump"], - ni: ["this", "that"], - nimi: ["name", "names", "word", "words"], - noka: ["foot", "feet", "leg", "legs"], - olin: ["love"], - ona: ["they", "them", "it"], - open: ["beginning", "beginnings"], - pakala: ["mistake", "mistakes"], - pan: ["grain", "grains"], - pana: ["giving"], - pali: ["work"], - palisa: ["long hard thing", "long hard things"], - pilin: ["emotion", "emotions"], - pimeja: ["blackness", "brownness", "grayness"], - pini: ["end", "ends"], - pipi: ["insect", "insects", "bug", "bugs"], - poka: ["side", "sides", "hips"], - poki: ["container"], - pona: ["goodness", "simplicity"], - sama: ["similarity"], - seli: ["fire", "heat", "chemical reaction", "chemical reactions"], - selo: ["outer form", "skin", "boundary", "boundaries"], - seme: ["what", "which"], - sewi: ["above", "divinity"], - sijelo: ["body", "bodies"], - sike: ["round thing", "round things", "cycle"], - sin: ["new thing", "new things"], - sina: ["you", "you all"], - sinpin: ["face", "faces", "wall", "walls"], - sitelen: ["writing", "writings", "image", "images"], - sona: ["knowledge"], - soweli: ["animal", "animals"], - suli: ["hugeness", "importance"], - suno: ["light source", "light sources", "sun"], - supa: ["horizontal surface", "horizontal surfaces"], - suwi: ["sweetness", "cuteness", "innocence"], - tan: ["reason", "origin"], - taso: [], - tawa: ["movement"], - telo: ["liquid"], - tenpo: ["time"], - toki: ["communication", "communications", "language", "languages", "hello"], - tomo: ["house", "houses"], - tonsi: ["transgender", "transgenders", "non-binary", "non-binaries"], - tu: ["pair"], - unpa: ["sex"], - uta: ["mouth"], - utala: ["conflict", "difficulty"], - walo: ["whiteness", "paleness"], - wan: ["one"], - waso: ["bird", "birds"], - wawa: ["power", "powers"], - weka: ["leaving"], - wile: ["want", "wants", "need", "needs"], - }; - export const ADJECTIVE = { - akesi: ["reptilian", "amphibian"], - ala: ["not", "no"], - alasa: [], - ale: ["all"], - ali: ["all"], - anpa: ["bottom"], - ante: ["different", "other"], - awen: ["staying"], - esun: [], - ijo: [], - ike: ["bad"], - ilo: [], - insa: [], - jaki: ["gross"], - jan: ["person-like"], - jelo: ["yellow"], - jo: [], - kala: ["fish-like"], - kalama: ["sounding"], - kama: ["arriving"], - kasi: ["plant-like"], - ken: [], - kili: [], - kiwen: ["hard"], - ko: ["soft"], - kon: [], - kule: ["colorful"], - kulupu: [], - kute: [], - lape: ["sleeping"], - laso: ["blue", "green"], - lawa: ["controlling"], - len: ["hidden"], - lete: ["cold", "uncooked"], - lili: ["small"], - linja: ["long flexible"], - lipu: ["paper-like"], - loje: ["red"], - lon: ["truthful"], - luka: [], - lukin: [], - lupa: [], - ma: ["earthy"], - mama: [], - mani: [], - meli: ["woman", "feminine"], - mi: ["my", "our"], - mije: ["man", "masculine"], - moku: [], - moli: ["dead", "deadly"], - monsi: [], - mu: ["mooing"], - mun: ["glowing"], - musi: ["entertaining"], - mute: ["many"], - nanpa: ["numeric"], - nasa: ["silly", "strange"], - nasin: [], - nena: [], - ni: ["this", "that"], - nimi: [], - noka: [], - olin: [], - ona: ["their", "its"], - open: [], - pakala: ["broken"], - pan: [], - pana: [], - pali: ["working"], - palisa: ["long hard"], - pilin: [], - pimeja: ["black", "brown", "gray"], - pini: ["ended"], - pipi: ["bug-like", "insect-like"], - poka: [], - poki: [], - pona: ["good", "simple"], - sama: [], - seli: ["hot"], - selo: [], - seme: ["what", "which"], - sewi: ["divine"], - sijelo: [], - sike: ["round"], - sin: ["new"], - sina: ["your"], - sinpin: [], - sitelen: [], - sona: ["knowledgeable"], - soweli: ["animal-like"], - suli: ["huge", "important"], - suno: ["shining"], - supa: [], - suwi: ["sweet", "cute", "innocent"], - tan: [], - tawa: ["moving"], - telo: ["liquid"], - tenpo: [], - toki: ["communicating"], - tomo: [], - tonsi: ["transgender", "non-binary"], - tu: ["two"], - unpa: ["sexual"], - uta: [], - utala: ["conflicting", "difficult"], - walo: ["white", "pale"], - wan: ["one"], - waso: ["bird-like"], - wawa: ["powerful"], - weka: ["leaving"], - wile: [], - }; - export const ADVERB = { - akesi: [], - ala: ["not"], - alasa: [], - ale: ["completely"], - ali: ["completely"], - anpa: [], - ante: ["differently"], - awen: [], - esun: [], - ijo: [], - ike: ["badly"], - ilo: [], - insa: [], - jaki: ["disgustingly"], - jan: [], - jelo: [], - jo: [], - kala: [], - kalama: [], - kama: [], - kasi: [], - ken: [], - kili: [], - kiwen: [], - ko: [], - kon: [], - kule: ["colorfully"], - kulupu: [], - kute: [], - lape: [], - laso: [], - lawa: [], - len: [], - lete: [], - lili: ["slightly"], - linja: [], - lipu: [], - loje: [], - lon: ["truthfully"], - luka: [], - lukin: [], - lupa: [], - ma: [], - mama: [], - mani: [], - meli: [], - mi: [], - mije: [], - moku: [], - moli: [], - monsi: [], - mu: [], - mun: [], - musi: ["entertainingly"], - mute: ["very"], - nanpa: ["numerically"], - nasa: ["strangely"], - nasin: [], - nena: [], - ni: [], - nimi: [], - noka: [], - olin: [], - ona: [], - open: [], - pakala: [], - pan: [], - pana: [], - pali: [], - palisa: [], - pilin: [], - pimeja: [], - pini: [], - pipi: [], - poka: [], - poki: [], - pona: ["nicely"], - sama: ["equally"], - seli: [], - selo: [], - seme: [], - sewi: ["divinely"], - sijelo: [], - sike: ["repeatedly"], - sin: ["newly"], - sina: [], - sinpin: [], - sitelen: [], - sona: [], - soweli: [], - suli: ["hugely", "importantly"], - suno: [], - supa: [], - suwi: ["sweetly"], - tan: [], - tawa: [], - telo: [], - tenpo: [], - toki: [], - tomo: [], - tonsi: [], - tu: [], - unpa: ["sexually"], - uta: [], - utala: ["conflictingly", "difficultly"], - walo: [], - wan: [], - waso: [], - wawa: ["powerfully"], - weka: [], - wile: [], - }; \ No newline at end of file + akesi: ["reptile", "reptiles", "amphibian", "amphibians"], + ala: ["nothing", "no"], + alasa: ["searching"], + ale: ["everything"], + ali: ["everything"], + anpa: ["bottom", "bottoms", "under"], + ante: ["changing"], + awen: ["staying"], + esun: ["shop", "shops"], + ijo: ["thing", "things"], + ike: ["badness"], + ilo: ["tool", "tools"], + insa: ["inside", "insides"], + jaki: ["obscenity", "obscenities"], + jan: ["person", "people", "human", "humans", "humanity"], + jelo: ["yellowness"], + jo: ["possession", "possessions"], + kala: ["fish", "fishes"], + kalama: ["sound", "sounds"], + kama: ["arriving"], + kasi: ["plant", "plants"], + ken: ["ability", "abilities", "possibility", "possibilities"], + kili: ["fruit", "fruits", "vegetable", "vegetables"], + kiwen: ["hard thing", "hard things"], + ko: ["soft thing", "soft things", "powder"], + kon: ["air", "essence"], + kule: ["color", "colors"], + kulupu: ["group", "groups"], + kute: ["ear", "ears", "listening"], + lape: ["sleep", "rest"], + laso: ["blueness", "greenness"], + lawa: ["head", "heads", "control", "controls"], + len: ["cloth", "clothes", "hiding"], + lete: ["coldness"], + lili: ["smallness"], + linja: ["long flexible thing", "long flexible things"], + lipu: ["book", "books", "paper", "paper-like thing", "paper-like things"], + loje: ["redness"], + lon: ["truth", "true"], + luka: ["hand", "hands", "arm", "arms"], + lukin: ["eye", "eyes", "sight"], + lupa: ["hole", "holes"], + ma: ["place", "places", "earth"], + mama: ["parent", "parents", "creator", "creators"], + mani: ["money", "large domestic animal", "large domestic animals"], + meli: ["woman", "women", "feminity"], + mi: ["I", "me", "we", "us"], + mije: ["man", "men", "masculinity"], + moku: ["food", "foods", "drink", "drinks"], + moli: ["death"], + monsi: ["back"], + mu: ["moo"], + mun: ["celestial object", "celestial objects", "glowing thing"], + musi: ["entertainment", "entertainments"], + mute: ["many"], + nanpa: ["number", "numbers"], + nasa: ["silliness", "strangeness"], + nasin: ["way"], + nena: ["bump"], + ni: ["this", "that"], + nimi: ["name", "names", "word", "words"], + noka: ["foot", "feet", "leg", "legs"], + olin: ["love"], + ona: ["they", "them", "it"], + open: ["beginning", "beginnings"], + pakala: ["mistake", "mistakes"], + pan: ["grain", "grains"], + pana: ["giving"], + pali: ["work"], + palisa: ["long hard thing", "long hard things"], + pilin: ["emotion", "emotions"], + pimeja: ["blackness", "brownness", "grayness"], + pini: ["end", "ends"], + pipi: ["insect", "insects", "bug", "bugs"], + poka: ["side", "sides", "hips"], + poki: ["container"], + pona: ["goodness", "simplicity"], + sama: ["similarity"], + seli: ["fire", "heat", "chemical reaction", "chemical reactions"], + selo: ["outer form", "skin", "boundary", "boundaries"], + seme: ["what", "which"], + sewi: ["above", "divinity"], + sijelo: ["body", "bodies"], + sike: ["round thing", "round things", "cycle"], + sin: ["new thing", "new things"], + sina: ["you", "you all"], + sinpin: ["face", "faces", "wall", "walls"], + sitelen: ["writing", "writings", "image", "images"], + sona: ["knowledge"], + soweli: ["animal", "animals"], + suli: ["hugeness", "importance"], + suno: ["light source", "light sources", "sun"], + supa: ["horizontal surface", "horizontal surfaces"], + suwi: ["sweetness", "cuteness", "innocence"], + tan: ["reason", "origin"], + taso: [], + tawa: ["movement"], + telo: ["liquid"], + tenpo: ["time"], + toki: ["communication", "communications", "language", "languages", "hello"], + tomo: ["house", "houses"], + tonsi: ["transgender", "transgenders", "non-binary", "non-binaries"], + tu: ["pair"], + unpa: ["sex"], + uta: ["mouth"], + utala: ["conflict", "difficulty"], + walo: ["whiteness", "paleness"], + wan: ["one"], + waso: ["bird", "birds"], + wawa: ["power", "powers"], + weka: ["leaving"], + wile: ["want", "wants", "need", "needs"], +}; +export const ADJECTIVE = { + akesi: ["reptilian", "amphibian"], + ala: ["not", "no"], + alasa: [], + ale: ["all"], + ali: ["all"], + anpa: ["bottom"], + ante: ["different", "other"], + awen: ["staying"], + esun: [], + ijo: [], + ike: ["bad"], + ilo: [], + insa: [], + jaki: ["gross"], + jan: ["person-like"], + jelo: ["yellow"], + jo: [], + kala: ["fish-like"], + kalama: ["sounding"], + kama: ["arriving"], + kasi: ["plant-like"], + ken: [], + kili: [], + kiwen: ["hard"], + ko: ["soft"], + kon: [], + kule: ["colorful"], + kulupu: [], + kute: [], + lape: ["sleeping"], + laso: ["blue", "green"], + lawa: ["controlling"], + len: ["hidden"], + lete: ["cold", "uncooked"], + lili: ["small"], + linja: ["long flexible"], + lipu: ["paper-like"], + loje: ["red"], + lon: ["truthful"], + luka: [], + lukin: [], + lupa: [], + ma: ["earthy"], + mama: [], + mani: [], + meli: ["woman", "feminine"], + mi: ["my", "our"], + mije: ["man", "masculine"], + moku: [], + moli: ["dead", "deadly"], + monsi: [], + mu: ["mooing"], + mun: ["glowing"], + musi: ["entertaining"], + mute: ["many"], + nanpa: ["numeric"], + nasa: ["silly", "strange"], + nasin: [], + nena: [], + ni: ["this", "that"], + nimi: [], + noka: [], + olin: [], + ona: ["their", "its"], + open: [], + pakala: ["broken"], + pan: [], + pana: [], + pali: ["working"], + palisa: ["long hard"], + pilin: [], + pimeja: ["black", "brown", "gray"], + pini: ["ended"], + pipi: ["bug-like", "insect-like"], + poka: [], + poki: [], + pona: ["good", "simple"], + sama: [], + seli: ["hot"], + selo: [], + seme: ["what", "which"], + sewi: ["divine"], + sijelo: [], + sike: ["round"], + sin: ["new"], + sina: ["your"], + sinpin: [], + sitelen: [], + sona: ["knowledgeable"], + soweli: ["animal-like"], + suli: ["huge", "important"], + suno: ["shining"], + supa: [], + suwi: ["sweet", "cute", "innocent"], + tan: [], + tawa: ["moving"], + telo: ["liquid"], + tenpo: [], + toki: ["communicating"], + tomo: [], + tonsi: ["transgender", "non-binary"], + tu: ["two"], + unpa: ["sexual"], + uta: [], + utala: ["conflicting", "difficult"], + walo: ["white", "pale"], + wan: ["one"], + waso: ["bird-like"], + wawa: ["powerful"], + weka: ["leaving"], + wile: [], +}; +export const ADVERB = { + akesi: [], + ala: ["not"], + alasa: [], + ale: ["completely"], + ali: ["completely"], + anpa: [], + ante: ["differently"], + awen: [], + esun: [], + ijo: [], + ike: ["badly"], + ilo: [], + insa: [], + jaki: ["disgustingly"], + jan: [], + jelo: [], + jo: [], + kala: [], + kalama: [], + kama: [], + kasi: [], + ken: [], + kili: [], + kiwen: [], + ko: [], + kon: [], + kule: ["colorfully"], + kulupu: [], + kute: [], + lape: [], + laso: [], + lawa: [], + len: [], + lete: [], + lili: ["slightly"], + linja: [], + lipu: [], + loje: [], + lon: ["truthfully"], + luka: [], + lukin: [], + lupa: [], + ma: [], + mama: [], + mani: [], + meli: [], + mi: [], + mije: [], + moku: [], + moli: [], + monsi: [], + mu: [], + mun: [], + musi: ["entertainingly"], + mute: ["very"], + nanpa: ["numerically"], + nasa: ["strangely"], + nasin: [], + nena: [], + ni: [], + nimi: [], + noka: [], + olin: [], + ona: [], + open: [], + pakala: [], + pan: [], + pana: [], + pali: [], + palisa: [], + pilin: [], + pimeja: [], + pini: [], + pipi: [], + poka: [], + poki: [], + pona: ["nicely"], + sama: ["equally"], + seli: [], + selo: [], + seme: [], + sewi: ["divinely"], + sijelo: [], + sike: ["repeatedly"], + sin: ["newly"], + sina: [], + sinpin: [], + sitelen: [], + sona: [], + soweli: [], + suli: ["hugely", "importantly"], + suno: [], + supa: [], + suwi: ["sweetly"], + tan: [], + tawa: [], + telo: [], + tenpo: [], + toki: [], + tomo: [], + tonsi: [], + tu: [], + unpa: ["sexually"], + uta: [], + utala: ["conflictingly", "difficultly"], + walo: [], + wan: [], + waso: [], + wawa: ["powerfully"], + weka: [], + wile: [], +}; From 89b4076bbe763151361818f7e8340349b3f12c56 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 12:51:50 +0800 Subject: [PATCH 052/271] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 525bf15..0b0c996 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e - New limitation list. - Dropped support for "a" particle. +- Error messages are now very inaccurate. Inside update (intended for developers): From a22546cf1228fcad21885fa6921f08bf7d29de46 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 13:13:23 +0800 Subject: [PATCH 053/271] prepare translator --- src/output.ts | 16 +++++++++++++++- src/translator.ts | 12 ++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 src/translator.ts diff --git a/src/output.ts b/src/output.ts index 896d5c1..e8ab72d 100644 --- a/src/output.ts +++ b/src/output.ts @@ -32,7 +32,21 @@ export class Output { isError(): boolean { return this.output.length === 0; } - map(mapper: (x: T) => U): Output { + map(mapper: (value: T) => U): Output { return new Output(this.output.map(mapper)); } + flatMap(mapper: (value: T) => Output): Output { + if (this.isError()) { + if (this.error) { + return new Output(this.error); + } else { + return new Output([]); + } + } + const wholeOutput = new Output([]); + for (const value of this.output) { + wholeOutput.append(mapper(value)); + } + return wholeOutput; + } } diff --git a/src/translator.ts b/src/translator.ts new file mode 100644 index 0000000..272a8ad --- /dev/null +++ b/src/translator.ts @@ -0,0 +1,12 @@ +import { Sentence } from "./ast"; +import { Output } from "./output"; +import { parser } from "./parser"; + +type TranslationOutput = Output; + +function translateSentence(output: Sentence): TranslationOutput { + throw new Error("todo"); +} +function translate(src: string): TranslationOutput { + return parser(src).flatMap(translateSentence); +} From a9857f58ff7d071d1716791196c8d6bd9df44763 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 13:20:54 +0800 Subject: [PATCH 054/271] add test-parser.ts and include .ts on every path --- src/translator.ts | 6 +++--- test-parser.ts | 4 ++++ 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 test-parser.ts diff --git a/src/translator.ts b/src/translator.ts index 272a8ad..9c4b57a 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,6 +1,6 @@ -import { Sentence } from "./ast"; -import { Output } from "./output"; -import { parser } from "./parser"; +import { Sentence } from "./ast.ts"; +import { Output } from "./output.ts"; +import { parser } from "./parser.ts"; type TranslationOutput = Output; diff --git a/test-parser.ts b/test-parser.ts new file mode 100644 index 0000000..6b7c415 --- /dev/null +++ b/test-parser.ts @@ -0,0 +1,4 @@ +import { parser } from "./src/parser.ts"; + +const input = await Deno.readTextFile("./test.txt"); +console.log(JSON.stringify(parser(input), null, 2)); From 07ec6001b7dadafd4d8261abcb3cdc8b1ce80f57 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 13:43:13 +0800 Subject: [PATCH 055/271] use deno --- .vscode/settings.json | 3 ++- tsconfig.json | 13 ------------- 2 files changed, 2 insertions(+), 14 deletions(-) delete mode 100644 tsconfig.json diff --git a/.vscode/settings.json b/.vscode/settings.json index 4533aed..4b4e19e 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -6,5 +6,6 @@ "description": "Ku Suli words of Toki Pona", "addWords": true } - } + }, + "deno.enable": true } \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json deleted file mode 100644 index b1e183b..0000000 --- a/tsconfig.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "compilerOptions": { - "strict": true, - "strictBindCallApply": true, - "strictFunctionTypes": true, - "strictNullChecks": true, - "strictPropertyInitialization": true, - "lib": ["ES2022"], - "downlevelIteration": true, - "allowImportingTsExtensions": true, - "noEmit": true, - } -} From bfa1261f305fd4b97a6b9adc4b54466261e4adec Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 13:45:40 +0800 Subject: [PATCH 056/271] use deno formatter --- .vscode/settings.json | 20 ++++++++--------- src/ast.ts | 20 ++++++++--------- src/parser.ts | 50 +++++++++++++++++++++---------------------- 3 files changed, 44 insertions(+), 46 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4b4e19e..e7c316b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,11 +1,11 @@ { - "cSpell.customDictionaries": { - "ku-suli": { - "name": "ku-suli", - "path": "${workspaceRoot}/nimi-ku-suli.txt", - "description": "Ku Suli words of Toki Pona", - "addWords": true - } - }, - "deno.enable": true -} \ No newline at end of file + "cSpell.customDictionaries": { + "ku-suli": { + "name": "ku-suli", + "path": "${workspaceRoot}/nimi-ku-suli.txt", + "description": "Ku Suli words of Toki Pona", + "addWords": true + } + }, + "deno.enable": true +} diff --git a/src/ast.ts b/src/ast.ts index f173612..674a65b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -21,17 +21,17 @@ export type Clause = | { type: "en phrases"; phrases: Array } | { type: "o vocative"; phrases: Array } | { - type: "li clause"; - subjects: Array; - predicates: Array; - prepositions: Array; - } + type: "li clause"; + subjects: Array; + predicates: Array; + prepositions: Array; + } | { - type: "o clause"; - subjects: Array; - predicates: Array; - prepositions: Array; - } + type: "o clause"; + subjects: Array; + predicates: Array; + prepositions: Array; + } | { type: "prepositions"; prepositions: Array }; export type FullClause = { taso: boolean; clause: Clause }; diff --git a/src/parser.ts b/src/parser.ts index f08d44c..1dc7f68 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -208,7 +208,7 @@ function word(): Parser { } function properWords(): Parser { return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map( - (array) => array.join(" ") + (array) => array.join(" "), ); } function wordFrom(set: Set, description: string): Parser { @@ -241,11 +241,10 @@ function modifier(): Parser { phrase, })), wordFrom(CONTENT_WORD, "modifier").map( - (word) => - ({ - type: "word", - word, - } as Modifier) + (word) => ({ + type: "word", + word, + } as Modifier), ), properWords().map((words) => ({ type: "proper words", @@ -256,7 +255,7 @@ function modifier(): Parser { .map((phrase) => ({ type: "pi", phrase, - })) + })), // TODO: cardinal modifier ); } @@ -265,13 +264,13 @@ function phrase(): Parser { ([headWord, modifiers]) => ({ headWord, modifiers, - }) + }), ); } function fullPhrase(): Parser { return sequence( optional(wordFrom(PREVERB, "preverb")), - recursive(phrase) + recursive(phrase), ).map(([preverb, phrase]) => { if (preverb) { return { @@ -292,21 +291,21 @@ function preposition(): Parser { ([preposition, phrase]) => ({ preposition, phrase, - }) + }), ); } function enPhrases(): Parser> { return sequence( fullPhrase(), - many(specificWord("en").with(fullPhrase())) + many(specificWord("en").with(fullPhrase())), ).map(([first, rest]) => [first, ...rest]); } function predicate(): Parser { return choice( preposition().map((preposition) => ({ type: "preposition", preposition })), fullPhrase().map( - (predicate) => ({ type: "default", predicate } as Predicate) - ) + (predicate) => ({ type: "default", predicate } as Predicate), + ), ); } function clause(): Parser { @@ -315,7 +314,7 @@ function clause(): Parser { wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), predicate(), many(specificWord("li").with(predicate())), - many(preposition()) + many(preposition()), ).map(([subject, predicate, morePredicates, prepositions]) => ({ type: "li clause", subjects: [ @@ -325,11 +324,10 @@ function clause(): Parser { prepositions, })), enPhrases().map( - (phrases) => - ({ - type: "en phrases", - phrases, - } as Clause) + (phrases) => ({ + type: "en phrases", + phrases, + } as Clause), ), enPhrases() .skip(specificWord("o")) @@ -340,7 +338,7 @@ function clause(): Parser { sequence( enPhrases(), manyAtLeastOnce(specificWord("li").with(predicate())), - many(preposition()) + many(preposition()), ).map(([subjects, predicates, prepositions]) => ({ type: "li clause", subjects, @@ -350,7 +348,7 @@ function clause(): Parser { sequence( enPhrases(), manyAtLeastOnce(specificWord("o").with(predicate())), - many(preposition()) + many(preposition()), ).map(([subjects, predicates, prepositions]) => ({ type: "o clause", subjects, @@ -360,7 +358,7 @@ function clause(): Parser { manyAtLeastOnce(preposition()).map((prepositions) => ({ type: "prepositions", prepositions, - })) + })), ); } function fullClause(): Parser { @@ -368,17 +366,17 @@ function fullClause(): Parser { ([taso, clause]) => ({ taso: !!taso, clause, - }) + }), ); } function sentence(): Parser { return choice( fullClause().map( - (clause) => ({ type: "single clause", clause } as Sentence) + (clause) => ({ type: "single clause", clause } as Sentence), ), sequence(fullClause().skip(specificWord("la")), recursive(sentence)).map( - ([left, right]) => ({ type: "la clauses", left, right }) - ) + ([left, right]) => ({ type: "la clauses", left, right }), + ), ); } function fullSentence(): Parser { From 5df0395488def797ec7a4a057bde993a66196b70 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 14:26:00 +0800 Subject: [PATCH 057/271] add language code to html --- index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/index.html b/index.html index e71bb0a..8e5e01a 100644 --- a/index.html +++ b/index.html @@ -1,5 +1,5 @@ - + @@ -28,7 +28,7 @@

Toki Pona Translator

>Limitations.

- +

    From 0e678589dc72f98f8b20812c9d9b8a428bb5723e Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 14:26:44 +0800 Subject: [PATCH 058/271] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b0c996..4f31187 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ Inside update (intended for developers): - Rewritten whole code to use module and typescript. - Rewritten parser to use parser combinator. +- Add language codes to html. ## 0.1.1 From 19a96ccb3d6b228dea4114826219c9c021b40c9c Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 14:57:10 +0800 Subject: [PATCH 059/271] order matters --- src/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 1dc7f68..501ef45 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -323,6 +323,10 @@ function clause(): Parser { predicates: [predicate, ...morePredicates], prepositions, })), + manyAtLeastOnce(preposition()).map((prepositions) => ({ + type: "prepositions", + prepositions, + })), enPhrases().map( (phrases) => ({ type: "en phrases", @@ -355,10 +359,6 @@ function clause(): Parser { predicates, prepositions, })), - manyAtLeastOnce(preposition()).map((prepositions) => ({ - type: "prepositions", - prepositions, - })), ); } function fullClause(): Parser { From dfc6131c54d8933550b727fa5a6d8471f021f10d Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 15:29:35 +0800 Subject: [PATCH 060/271] add todo for x ala x constructions --- src/ast.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/ast.ts b/src/ast.ts index 674a65b..4b187a3 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,3 +1,5 @@ +// TODO: X ala X constructions + export type Modifier = | { type: "word"; word: string } | { type: "proper words"; words: string } From 79913ff7e3c49ecf3f73654f62c6cf4d09081bf5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 15:30:41 +0800 Subject: [PATCH 061/271] add X ala X constructions to limitations --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 83f73e7..67fc20a 100644 --- a/README.md +++ b/README.md @@ -73,3 +73,4 @@ The whole code is being rewritten and there will be new different limitations. - ✏️ "a" particle - ✏️ "anu" particle +- ✏️ "X ala X" constructions From 9763dccf6375818e31381d838114ac624ab4f122 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 16:26:54 +0800 Subject: [PATCH 062/271] o predicates no longer requires subject --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 501ef45..bd23e32 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -350,12 +350,12 @@ function clause(): Parser { prepositions, })), sequence( - enPhrases(), + optional(enPhrases()), manyAtLeastOnce(specificWord("o").with(predicate())), many(preposition()), ).map(([subjects, predicates, prepositions]) => ({ type: "o clause", - subjects, + subjects: subjects ?? [], predicates, prepositions, })), From 716c73acb244a2e225535297feb725b19fd320a6 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 16:40:43 +0800 Subject: [PATCH 063/271] update limitations --- README.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/README.md b/README.md index 67fc20a..981bb70 100644 --- a/README.md +++ b/README.md @@ -71,6 +71,12 @@ The following are currently unrecognized (non-definitive but pedantic). ✏️ m The whole code is being rewritten and there will be new different limitations. + + - ✏️ "a" particle - ✏️ "anu" particle - ✏️ "X ala X" constructions +- ✏️ Extended numbering system +- Multiple sentences +- Clause with both "li" and "o" +- "kepeken" as headword or modifier From f80832395d3b6dd4a011693b891e754da38c4903 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 16:42:42 +0800 Subject: [PATCH 064/271] update notes for new limitation --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 981bb70..88d8f9a 100644 --- a/README.md +++ b/README.md @@ -71,7 +71,7 @@ The following are currently unrecognized (non-definitive but pedantic). ✏️ m The whole code is being rewritten and there will be new different limitations. - + - ✏️ "a" particle - ✏️ "anu" particle From 8d7f79b106c6f61fc6486eca4776e46a62da05f6 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 16:57:22 +0800 Subject: [PATCH 065/271] update limitations --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 88d8f9a..e2812e5 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,7 @@ The whole code is being rewritten and there will be new different limitations. - ✏️ "anu" particle - ✏️ "X ala X" constructions - ✏️ Extended numbering system +- ✏️ Commas - Multiple sentences - Clause with both "li" and "o" - "kepeken" as headword or modifier From 7f46c9384bc42f86b1b9d40671c1504f1c32d7a1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 17:03:04 +0800 Subject: [PATCH 066/271] remove todo --- src/ast.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 4b187a3..674a65b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,5 +1,3 @@ -// TODO: X ala X constructions - export type Modifier = | { type: "word"; word: string } | { type: "proper words"; words: string } From 0717a0f964cf670211d11cd95e55560c8574e791 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 17:19:28 +0800 Subject: [PATCH 067/271] update translation --- CHANGELOG.md | 2 ++ src/translation.ts | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f31187..1a4ae54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,8 @@ For this version. The whole code has been rewritten. This makes the code a lot e - New limitation list. - Dropped support for "a" particle. - Error messages are now very inaccurate. +- Update translation list: + - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY) Inside update (intended for developers): diff --git a/src/translation.ts b/src/translation.ts index 6231d0d..d2af1d2 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -100,7 +100,7 @@ export const NOUN = { tenpo: ["time"], toki: ["communication", "communications", "language", "languages", "hello"], tomo: ["house", "houses"], - tonsi: ["transgender", "transgenders", "non-binary", "non-binaries"], + tonsi: ["transgender person", "transgender people", "non-binary person", "non-binary people"], tu: ["pair"], unpa: ["sex"], uta: ["mouth"], From ed8adb52b13f47908b4bdc4d45ba18b92143337a Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 17:20:08 +0800 Subject: [PATCH 068/271] small change --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 1a4ae54..eeea01f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e Inside update (intended for developers): -- Rewritten whole code to use module and typescript. +- Rewritten whole code to use module and TypeScript. - Rewritten parser to use parser combinator. - Add language codes to html. From 43be7016584dbf1a358c913d41b38691f290e70e Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 17:22:02 +0800 Subject: [PATCH 069/271] further establish the sorry --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eeea01f..fdef581 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e - Dropped support for "a" particle. - Error messages are now very inaccurate. - Update translation list: - - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY) + - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭) Inside update (intended for developers): From be2ba6a2ca4639c4adc715655ff1ffdd65052da6 Mon Sep 17 00:00:00 2001 From: neverRare Date: Fri, 12 Jan 2024 17:29:19 +0800 Subject: [PATCH 070/271] small formatting edit --- CHANGELOG.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fdef581..23ac8ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,8 @@ For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. - New limitation list. -- Dropped support for "a" particle. -- Error messages are now very inaccurate. +- Drop support for "a" particle. +- (Downgrade) Error messages are now very inaccurate. - Update translation list: - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭) From 0d5055bb2a37e7a7bedb3e9c8bc6e63bb1e69a34 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 16:37:59 +0800 Subject: [PATCH 071/271] rewritten parser combinators to use more functional style --- src/parser.ts | 118 +++++++++++++++++--------------------------------- 1 file changed, 39 insertions(+), 79 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index bd23e32..8b60d6a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -22,29 +22,19 @@ type ParserOutput = Output>; class Parser { constructor(public readonly parser: (src: string) => ParserOutput) {} map(mapper: (x: T) => U): Parser { - return new Parser((src) => { - const result = this.parser(src); - if (result.isError()) { - if (result.error) { - return new Output>(result.error); - } else { - return new Output([]); - } - } - const output = new Output>([]); - for (const { value, rest } of result.output) { + return new Parser((src) => + this.parser(src).flatMap(({ value, rest }) => { try { - output.push({ value: mapper(value), rest }); + return new Output([{ value: mapper(value), rest }]); } catch (error) { if (error instanceof Error) { - output.setError(error); + return new Output(error); } else { throw error; } } - } - return output; - }); + }) + ); } with(parser: Parser): Parser { return sequence(this, parser).map(([_, output]) => output); @@ -72,9 +62,7 @@ function match(regex: RegExp): Parser { }); } function nothing(): Parser { - return new Parser((src) => { - return new Output([{ value: null, rest: src }]); - }); + return new Parser((src) => new Output([{ value: null, rest: src }])); } function eol(): Parser { return new Parser((src) => { @@ -89,13 +77,9 @@ function recursive(parser: () => Parser): Parser { return new Parser((src) => parser().parser(src)); } function choice(...choices: Array>): Parser { - return new Parser((src) => { - let output = new Output>([]); - for (const parser of choices) { - output.append(parser.parser(src)); - } - return output; - }); + return new Parser((src) => + new Output(choices).flatMap((parser) => parser.parser(src)) + ); } function optional(parser: Parser): Parser { return choice(parser, nothing()); @@ -103,55 +87,37 @@ function optional(parser: Parser): Parser { function sequence>( ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } ): Parser { - if (sequence.length === 0) { - throw new Error("sequences can't be empty"); - } // We resorted to using `any` types here, make sure it works properly - return new Parser((src) => { - let wholeOutput = new Output>([{ value: [], rest: src }]); - for (const parser of sequence) { - let newOutput = new Output>([]); - for (const { value, rest } of wholeOutput.output) { - const { output, error } = parser.parser(rest); - if (output.length === 0) { - newOutput.setError(error); - } else { - for (const { value: newValue, rest } of output) { - newOutput.push({ - value: [...value, newValue], - rest, - }); - } - } - } - wholeOutput = newOutput; - } - return wholeOutput; - }); + return new Parser((src) => + sequence.reduce( + (output, parser) => + output.flatMap(({ value, rest }) => + parser.parser(rest).map(({ value: newValue, rest }) => ({ + value: [...value, newValue], + rest, + })) + ), + new Output>([{ value: [], rest: src }]), + ) + ); } function many(parser: Parser): Parser> { return new Parser((src) => { - let wholeOutput = new Output>>([ + const wholeOutput = new Output>>([ { value: [], rest: src }, ]); let currentOutput = new Output>>([ { value: [], rest: src }, ]); while (true) { - let newOutput = new Output>>([]); - for (const { value, rest } of currentOutput.output) { - const { output, error } = parser.parser(rest); - if (output.length === 0) { - newOutput.setError(error); - } else { - for (const { value: newValue, rest } of output) { - newOutput.push({ - value: [...value, newValue], - rest, - }); - } - } - } + const newOutput = currentOutput.flatMap(({ value, rest }) => + parser.parser(rest).map(( + { value: newValue, rest }, + ) => ({ + value: [...value, newValue], + rest, + })) + ); if (newOutput.isError()) { break; } else { @@ -168,20 +134,14 @@ function all(parser: Parser): Parser> { { value: [], rest: src }, ]); while (true) { - let newOutput = new Output>>([]); - for (const { value, rest } of wholeOutput.output) { - const { output, error } = parser.parser(rest); - if (output.length === 0) { - newOutput.setError(error); - } else { - for (const { value: newValue, rest } of output) { - newOutput.push({ - value: [...value, newValue], - rest, - }); - } - } - } + const newOutput = wholeOutput.flatMap(({ value, rest }) => + parser.parser(rest).map(( + { value: newValue, rest }, + ) => ({ + value: [...value, newValue], + rest, + })) + ); if (newOutput.isError()) { break; } else { From 6e72d569b41539bfc5ec5d1c697a4bd762f1e706 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 16:59:55 +0800 Subject: [PATCH 072/271] functional programming FTW! --- src/parser.ts | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 8b60d6a..b49c5c6 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -102,31 +102,12 @@ function sequence>( ); } function many(parser: Parser): Parser> { - return new Parser((src) => { - const wholeOutput = new Output>>([ - { value: [], rest: src }, - ]); - let currentOutput = new Output>>([ - { value: [], rest: src }, - ]); - while (true) { - const newOutput = currentOutput.flatMap(({ value, rest }) => - parser.parser(rest).map(( - { value: newValue, rest }, - ) => ({ - value: [...value, newValue], - rest, - })) - ); - if (newOutput.isError()) { - break; - } else { - wholeOutput.append(newOutput); - currentOutput = newOutput; - } - } - return wholeOutput; - }); + return choice( + nothing().map(() => []), + sequence(parser, recursive(() => many(parser))).map(( + [first, rest], + ) => [first, ...rest]), + ); } function all(parser: Parser): Parser> { return new Parser((src) => { From 4a26288a374ed70398f141f5042fc16379481eb3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 17:01:39 +0800 Subject: [PATCH 073/271] we don't need this --- src/parser.ts | 33 ++++----------------------------- 1 file changed, 4 insertions(+), 29 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index b49c5c6..9fcc3b9 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -109,38 +109,12 @@ function many(parser: Parser): Parser> { ) => [first, ...rest]), ); } -function all(parser: Parser): Parser> { - return new Parser((src) => { - let wholeOutput = new Output>>([ - { value: [], rest: src }, - ]); - while (true) { - const newOutput = wholeOutput.flatMap(({ value, rest }) => - parser.parser(rest).map(( - { value: newValue, rest }, - ) => ({ - value: [...value, newValue], - rest, - })) - ); - if (newOutput.isError()) { - break; - } else { - wholeOutput = newOutput; - } - } - return wholeOutput; - }); -} function manyAtLeastOnce(parser: Parser): Parser> { return sequence(parser, many(parser)).map(([first, rest]) => [ first, ...rest, ]); } -function allAtLeastOnce(parser: Parser): Parser> { - return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); -} function allSpace(): Parser { return match(/\s*/).map(([space]) => space); } @@ -148,9 +122,10 @@ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } function properWords(): Parser { - return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map( - (array) => array.join(" "), - ); + return manyAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)) + .map( + (array) => array.join(" "), + ); } function wordFrom(set: Set, description: string): Parser { return word().map((word) => { From 1622cf99542ec28733aa5af7f5f20a290b9d0c80 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 17:39:55 +0800 Subject: [PATCH 074/271] rename recursive to lazy --- src/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 9fcc3b9..4f83227 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -73,7 +73,7 @@ function eol(): Parser { } }); } -function recursive(parser: () => Parser): Parser { +function lazy(parser: () => Parser): Parser { return new Parser((src) => parser().parser(src)); } function choice(...choices: Array>): Parser { @@ -104,7 +104,7 @@ function sequence>( function many(parser: Parser): Parser> { return choice( nothing().map(() => []), - sequence(parser, recursive(() => many(parser))).map(( + sequence(parser, lazy(() => many(parser))).map(( [first, rest], ) => [first, ...rest]), ); @@ -186,7 +186,7 @@ function phrase(): Parser { function fullPhrase(): Parser { return sequence( optional(wordFrom(PREVERB, "preverb")), - recursive(phrase), + lazy(phrase), ).map(([preverb, phrase]) => { if (preverb) { return { @@ -290,7 +290,7 @@ function sentence(): Parser { fullClause().map( (clause) => ({ type: "single clause", clause } as Sentence), ), - sequence(fullClause().skip(specificWord("la")), recursive(sentence)).map( + sequence(fullClause().skip(specificWord("la")), lazy(sentence)).map( ([left, right]) => ({ type: "la clauses", left, right }), ), ); From cb58d590defa318ba5933068923a70a8a2e3f7e1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 17:41:02 +0800 Subject: [PATCH 075/271] order matters --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 4f83227..c5698e2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -103,10 +103,10 @@ function sequence>( } function many(parser: Parser): Parser> { return choice( - nothing().map(() => []), sequence(parser, lazy(() => many(parser))).map(( [first, rest], ) => [first, ...rest]), + nothing().map(() => []), ); } function manyAtLeastOnce(parser: Parser): Parser> { From 3e17a725a3833f481abf598a0d2f603c323fb066 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 17:53:52 +0800 Subject: [PATCH 076/271] reintroduce "all" combinator --- src/parser.ts | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index c5698e2..80df067 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -81,6 +81,17 @@ function choice(...choices: Array>): Parser { new Output(choices).flatMap((parser) => parser.parser(src)) ); } +function choiceOnlyOne(...choices: Array>): Parser { + return new Parser((src) => + choices.reduce((output, parser) => { + if (output.isError()) { + return parser.parser(src); + } else { + return output; + } + }, new Output>([])) + ); +} function optional(parser: Parser): Parser { return choice(parser, nothing()); } @@ -115,6 +126,20 @@ function manyAtLeastOnce(parser: Parser): Parser> { ...rest, ]); } +function all(parser: Parser): Parser> { + return choiceOnlyOne( + sequence(parser, lazy(() => many(parser))).map(( + [first, rest], + ) => [first, ...rest]), + nothing().map(() => []), + ); +} +function allAtLeastOnce(parser: Parser): Parser> { + return sequence(parser, all(parser)).map(([first, rest]) => [ + first, + ...rest, + ]); +} function allSpace(): Parser { return match(/\s*/).map(([space]) => space); } @@ -122,7 +147,7 @@ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } function properWords(): Parser { - return manyAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)) + return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)) .map( (array) => array.join(" "), ); From d50eb05db6c312e4dd07293c4501a37ae0a25d89 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 18:00:25 +0800 Subject: [PATCH 077/271] minimize effectful methods --- src/output.ts | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/src/output.ts b/src/output.ts index e8ab72d..cde260d 100644 --- a/src/output.ts +++ b/src/output.ts @@ -12,11 +12,7 @@ export class Output { throw new Error("passed not array nor error"); } } - push(output: T): void { - this.output.push(output); - this.error = null; - } - append({ output, error }: Output): void { + private append({ output, error }: Output): void { this.output = [...this.output, ...output]; if (this.output.length > 0) { this.error = null; @@ -24,11 +20,6 @@ export class Output { this.error = error; } } - setError(error: null | Error): void { - if (!this.error && this.output.length === 0) { - this.error = error; - } - } isError(): boolean { return this.output.length === 0; } From a819649e0ee805e335a5d28deaaf97d6ed319620 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 18:03:03 +0800 Subject: [PATCH 078/271] allow any --- src/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.ts b/src/parser.ts index 80df067..b988611 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -108,6 +108,7 @@ function sequence>( rest, })) ), + // deno-lint-ignore no-explicit-any new Output>([{ value: [], rest: src }]), ) ); From 48962d60782c45b30145f9c0780cefb1cac89834 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 18:14:45 +0800 Subject: [PATCH 079/271] move map complexity above --- src/output.ts | 12 +++++++++++- src/parser.ts | 15 ++++----------- 2 files changed, 15 insertions(+), 12 deletions(-) diff --git a/src/output.ts b/src/output.ts index cde260d..8460ab5 100644 --- a/src/output.ts +++ b/src/output.ts @@ -24,7 +24,17 @@ export class Output { return this.output.length === 0; } map(mapper: (value: T) => U): Output { - return new Output(this.output.map(mapper)); + return this.flatMap((value) => { + try { + return new Output([mapper(value)]); + } catch (error) { + if (error instanceof Error) { + return new Output(error); + } else { + throw error; + } + } + }); } flatMap(mapper: (value: T) => Output): Output { if (this.isError()) { diff --git a/src/parser.ts b/src/parser.ts index b988611..74b8359 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -23,17 +23,10 @@ class Parser { constructor(public readonly parser: (src: string) => ParserOutput) {} map(mapper: (x: T) => U): Parser { return new Parser((src) => - this.parser(src).flatMap(({ value, rest }) => { - try { - return new Output([{ value: mapper(value), rest }]); - } catch (error) { - if (error instanceof Error) { - return new Output(error); - } else { - throw error; - } - } - }) + this.parser(src).map(({ value, rest }) => ({ + value: mapper(value), + rest, + })) ); } with(parser: Parser): Parser { From 4d53b4a40298f34f7035cf885da79b4764205b11 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 18:21:12 +0800 Subject: [PATCH 080/271] avoid empty error --- src/output.ts | 4 ++-- src/parser.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/output.ts b/src/output.ts index 8460ab5..1a078c1 100644 --- a/src/output.ts +++ b/src/output.ts @@ -41,10 +41,10 @@ export class Output { if (this.error) { return new Output(this.error); } else { - return new Output([]); + return new Output(new Error("no error provided")); } } - const wholeOutput = new Output([]); + const wholeOutput = new Output(new Error("no error provided")); for (const value of this.output) { wholeOutput.append(mapper(value)); } diff --git a/src/parser.ts b/src/parser.ts index 74b8359..bb626fb 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -82,7 +82,7 @@ function choiceOnlyOne(...choices: Array>): Parser { } else { return output; } - }, new Output>([])) + }, new Output>(new Error("no error provided"))) ); } function optional(parser: Parser): Parser { From 790027f2c59090be1bf3566f6dd18cc4086c1142 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:15:56 +0800 Subject: [PATCH 081/271] create unique error type for output --- src/error.ts | 5 +++-- src/output.ts | 12 +++++++----- src/parser.ts | 4 ++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/src/error.ts b/src/error.ts index 2c9a819..2f52844 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1,9 +1,10 @@ -export class UnreachableError extends Error { +export class OutputError extends Error {} +export class UnreachableError extends OutputError { constructor() { super("This is an error you shouldn't see... Please report this error."); } } -export class UnrecognizedError extends Error { +export class UnrecognizedError extends OutputError { constructor(token: string) { super(`${token} is unrecognized.`); } diff --git a/src/output.ts b/src/output.ts index 1a078c1..4002fac 100644 --- a/src/output.ts +++ b/src/output.ts @@ -1,7 +1,9 @@ +import { OutputError } from "./error.ts"; + export class Output { output: Array; - error: null | Error; - constructor(output: Array | Error) { + error: null | OutputError; + constructor(output: Array | OutputError) { if (Array.isArray(output)) { this.output = output; this.error = null; @@ -28,7 +30,7 @@ export class Output { try { return new Output([mapper(value)]); } catch (error) { - if (error instanceof Error) { + if (error instanceof OutputError) { return new Output(error); } else { throw error; @@ -41,10 +43,10 @@ export class Output { if (this.error) { return new Output(this.error); } else { - return new Output(new Error("no error provided")); + return new Output(new OutputError("no error provided")); } } - const wholeOutput = new Output(new Error("no error provided")); + const wholeOutput = new Output(new OutputError("no error provided")); for (const value of this.output) { wholeOutput.append(mapper(value)); } diff --git a/src/parser.ts b/src/parser.ts index bb626fb..24f0efe 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -8,7 +8,7 @@ import { Preposition, Sentence, } from "./ast.ts"; -import { UnreachableError, UnrecognizedError } from "./error.ts"; +import { OutputError, UnreachableError, UnrecognizedError } from "./error.ts"; import { Output } from "./output.ts"; import { CONTENT_WORD, @@ -82,7 +82,7 @@ function choiceOnlyOne(...choices: Array>): Parser { } else { return output; } - }, new Output>(new Error("no error provided"))) + }, new Output>(new OutputError("no error provided"))) ); } function optional(parser: Parser): Parser { From 54ed0a1961399f44082226e7c5ede9d470e13366 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:18:25 +0800 Subject: [PATCH 082/271] output constructor now have optional parameter --- src/output.ts | 9 +++++---- src/parser.ts | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/output.ts b/src/output.ts index 4002fac..addad13 100644 --- a/src/output.ts +++ b/src/output.ts @@ -3,7 +3,7 @@ import { OutputError } from "./error.ts"; export class Output { output: Array; error: null | OutputError; - constructor(output: Array | OutputError) { + constructor(output?: Array | OutputError) { if (Array.isArray(output)) { this.output = output; this.error = null; @@ -11,7 +11,8 @@ export class Output { this.output = []; this.error = output; } else { - throw new Error("passed not array nor error"); + this.output = []; + this.error = new OutputError("no error provided"); } } private append({ output, error }: Output): void { @@ -43,10 +44,10 @@ export class Output { if (this.error) { return new Output(this.error); } else { - return new Output(new OutputError("no error provided")); + return new Output(new OutputError()); } } - const wholeOutput = new Output(new OutputError("no error provided")); + const wholeOutput = new Output(new OutputError()); for (const value of this.output) { wholeOutput.append(mapper(value)); } diff --git a/src/parser.ts b/src/parser.ts index 24f0efe..1106adc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -82,7 +82,7 @@ function choiceOnlyOne(...choices: Array>): Parser { } else { return output; } - }, new Output>(new OutputError("no error provided"))) + }, new Output>(new OutputError())) ); } function optional(parser: Parser): Parser { From c8c119a81f6293bb81a4d89b2c6a7ec369f4bc89 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:19:10 +0800 Subject: [PATCH 083/271] small edit --- src/output.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output.ts b/src/output.ts index addad13..f7a5542 100644 --- a/src/output.ts +++ b/src/output.ts @@ -7,7 +7,7 @@ export class Output { if (Array.isArray(output)) { this.output = output; this.error = null; - } else if (output instanceof Error) { + } else if (output instanceof OutputError) { this.output = []; this.error = output; } else { From 2a0dfbee9d4bb7b312188baa504affbf8a11959c Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:20:53 +0800 Subject: [PATCH 084/271] allow null and undefined in optional parameter --- src/output.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output.ts b/src/output.ts index f7a5542..c91a316 100644 --- a/src/output.ts +++ b/src/output.ts @@ -3,7 +3,7 @@ import { OutputError } from "./error.ts"; export class Output { output: Array; error: null | OutputError; - constructor(output?: Array | OutputError) { + constructor(output?: undefined | null | Array | OutputError) { if (Array.isArray(output)) { this.output = output; this.error = null; From 3fa3f8d9e9ad0b2e2fd0285b53c1ceb51e275d9b Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:23:53 +0800 Subject: [PATCH 085/271] further establish the parameter is now optional --- src/output.ts | 4 ++-- src/parser.ts | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/output.ts b/src/output.ts index c91a316..9e2621b 100644 --- a/src/output.ts +++ b/src/output.ts @@ -44,10 +44,10 @@ export class Output { if (this.error) { return new Output(this.error); } else { - return new Output(new OutputError()); + return new Output(); } } - const wholeOutput = new Output(new OutputError()); + const wholeOutput = new Output(); for (const value of this.output) { wholeOutput.append(mapper(value)); } diff --git a/src/parser.ts b/src/parser.ts index 1106adc..a73a59f 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -82,7 +82,7 @@ function choiceOnlyOne(...choices: Array>): Parser { } else { return output; } - }, new Output>(new OutputError())) + }, new Output>()) ); } function optional(parser: Parser): Parser { From 427be7ddf099c5e2231c57bb250197a727d56282 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:26:34 +0800 Subject: [PATCH 086/271] lint --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index a73a59f..bc51540 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -8,7 +8,7 @@ import { Preposition, Sentence, } from "./ast.ts"; -import { OutputError, UnreachableError, UnrecognizedError } from "./error.ts"; +import { UnreachableError, UnrecognizedError } from "./error.ts"; import { Output } from "./output.ts"; import { CONTENT_WORD, From 8c1cac4437adba69831f09b8b0a88fabec497510 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:29:45 +0800 Subject: [PATCH 087/271] small change --- src/output.ts | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/output.ts b/src/output.ts index 9e2621b..9f00129 100644 --- a/src/output.ts +++ b/src/output.ts @@ -41,11 +41,7 @@ export class Output { } flatMap(mapper: (value: T) => Output): Output { if (this.isError()) { - if (this.error) { - return new Output(this.error); - } else { - return new Output(); - } + return new Output(this.error); } const wholeOutput = new Output(); for (const value of this.output) { From a24ca5faa722d22e1a7412cc42371ea9d5fb6ccd Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:41:03 +0800 Subject: [PATCH 088/271] fix error --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index bc51540..15202ce 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -122,7 +122,7 @@ function manyAtLeastOnce(parser: Parser): Parser> { } function all(parser: Parser): Parser> { return choiceOnlyOne( - sequence(parser, lazy(() => many(parser))).map(( + sequence(parser, lazy(() => all(parser))).map(( [first, rest], ) => [first, ...rest]), nothing().map(() => []), From 595126176810e25318562f2c68ad5a0c6a7232fe Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 13 Jan 2024 19:46:17 +0800 Subject: [PATCH 089/271] more error fixed --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 15202ce..9205b30 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -160,7 +160,7 @@ function specificWord(thatWord: string): Parser { if (thatWord === thisWord) { return thisWord; } else { - throw new UnrecognizedError(`"${thisWord}" instead of "${word}"`); + throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); } }); } From ea4f527b2276a2318eb3f248a8b4eda6ab834fd1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 08:31:31 +0800 Subject: [PATCH 090/271] ensure error is provided when the array is empty --- src/output.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/output.ts b/src/output.ts index 9f00129..6b649e7 100644 --- a/src/output.ts +++ b/src/output.ts @@ -6,7 +6,11 @@ export class Output { constructor(output?: undefined | null | Array | OutputError) { if (Array.isArray(output)) { this.output = output; - this.error = null; + if (output.length === 0) { + this.error = new OutputError("no error provided"); + } else { + this.error = null; + } } else if (output instanceof OutputError) { this.output = []; this.error = output; From 4aef606115f4fae7e9019fb99bfa30d250214457 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 09:14:13 +0800 Subject: [PATCH 091/271] include documentation comments --- src/ast.ts | 8 +++++++ src/error.ts | 6 +++++ src/output.ts | 13 ++++++++++ src/parser.ts | 60 ++++++++++++++++++++++++++++++++++++++++++++++ src/translation.ts | 3 +++ src/vocabulary.ts | 5 ++++ 6 files changed, 95 insertions(+) diff --git a/src/ast.ts b/src/ast.ts index 674a65b..aa5ab59 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,3 +1,4 @@ +/** Represents a single modifier. */ export type Modifier = | { type: "word"; word: string } | { type: "proper words"; words: string } @@ -5,18 +6,23 @@ export type Modifier = | { type: "nanpa ordinal"; phrase: FullPhrase } | { type: "cardinal"; number: Array }; +/** Represents a simple phrase. */ export type Phrase = { headWord: string; modifiers: Array }; +/** Represents a phrase including preverbial phrases. */ export type FullPhrase = | { type: "default"; phrase: Phrase } | { type: "preverb"; preverb: string; phrase: Phrase }; +/** Represents a single prepositional phrase. */ export type Preposition = { preposition: string; phrase: FullPhrase }; +/** Represents a single predicate. */ export type Predicate = | { type: "default"; predicate: FullPhrase } | { type: "preposition"; preposition: Preposition }; +/** Represents a simple clause. */ export type Clause = | { type: "en phrases"; phrases: Array } | { type: "o vocative"; phrases: Array } @@ -34,8 +40,10 @@ export type Clause = } | { type: "prepositions"; prepositions: Array }; +/** Represents a clause including preclause and postclause. */ export type FullClause = { taso: boolean; clause: Clause }; +/** Represents a single full sentence. */ export type Sentence = | { type: "single clause"; clause: FullClause } | { type: "la clauses"; left: FullClause; right: Sentence }; diff --git a/src/error.ts b/src/error.ts index 2f52844..d413bfe 100644 --- a/src/error.ts +++ b/src/error.ts @@ -1,9 +1,15 @@ +/** Represents Error used by `Output`. */ export class OutputError extends Error {} +/** + * Represents errors that cannot be seen. This includes errors expected to be + * unreached as well as errors expected to be covered by non-error outputs. + */ export class UnreachableError extends OutputError { constructor() { super("This is an error you shouldn't see... Please report this error."); } } +/** Represents Error caused by unrecognized elements. */ export class UnrecognizedError extends OutputError { constructor(token: string) { super(`${token} is unrecognized.`); diff --git a/src/output.ts b/src/output.ts index 6b649e7..41829f3 100644 --- a/src/output.ts +++ b/src/output.ts @@ -1,7 +1,12 @@ import { OutputError } from "./error.ts"; +/** Represents possibilities and error. */ export class Output { + /** Represents possibilities, considered error when the array is empty. */ output: Array; + /** + * An optional error, should be supplied if and only if the array is empty. + */ error: null | OutputError; constructor(output?: undefined | null | Array | OutputError) { if (Array.isArray(output)) { @@ -30,6 +35,10 @@ export class Output { isError(): boolean { return this.output.length === 0; } + /** + * Maps all values and returns new Output. For convenience, the mapper + * function can throw OutputError; Other kinds of errors will be ignored. + */ map(mapper: (value: T) => U): Output { return this.flatMap((value) => { try { @@ -43,6 +52,10 @@ export class Output { } }); } + /** + * Accepts mapper function that returns another Output. flatMap takes all + * values and flattens them into single array for Output. + */ flatMap(mapper: (value: T) => Output): Output { if (this.isError()) { return new Output(this.error); diff --git a/src/parser.ts b/src/parser.ts index 9205b30..373d4fe 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -17,10 +17,17 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; +/** A single parsing result. */ type ValueRest = { value: T; rest: string }; +/** A special kind of Output that parsers returns. */ type ParserOutput = Output>; +/** Wrapper of parser function with added methods for convenience. */ class Parser { constructor(public readonly parser: (src: string) => ParserOutput) {} + /** + * Maps the parsing result. For convenience, the mapper function can throw + * an OutputError; Other kinds of error are ignored. + */ map(mapper: (x: T) => U): Parser { return new Parser((src) => this.parser(src).map(({ value, rest }) => ({ @@ -29,13 +36,19 @@ class Parser { })) ); } + /** Takes another parser and discards the first parsing result. */ with(parser: Parser): Parser { return sequence(this, parser).map(([_, output]) => output); } + /** Takes another parser and discards its parsing result. */ skip(parser: Parser): Parser { return sequence(this, parser).map(([output, _]) => output); } } +/** + * Uses Regular Expression to create parser. The parser outputs + * RegExpMatchArray, which is what `string.match( ... )` returns. + */ function match(regex: RegExp): Parser { const newRegex = new RegExp("^" + regex.source, regex.flags); return new Parser((src) => { @@ -54,9 +67,11 @@ function match(regex: RegExp): Parser { } }); } +/** Parses nothing and leaves the source string intact. */ function nothing(): Parser { return new Parser((src) => new Output([{ value: null, rest: src }])); } +/** Parses the end of line (or the end of sentence in context of Toki Pona) */ function eol(): Parser { return new Parser((src) => { if (src === "") { @@ -66,14 +81,26 @@ function eol(): Parser { } }); } +/** + * Lazily evaluates the parser function only when needed. Useful for recursive + * parsers. + */ function lazy(parser: () => Parser): Parser { return new Parser((src) => parser().parser(src)); } +/** + * Evaluates all parsers on the same source string and sums it all on a single + * Output. + */ function choice(...choices: Array>): Parser { return new Parser((src) => new Output(choices).flatMap((parser) => parser.parser(src)) ); } +/** + * Tries to evaluate each parsers one at a time and only returns the first + * Output without error. + */ function choiceOnlyOne(...choices: Array>): Parser { return new Parser((src) => choices.reduce((output, parser) => { @@ -85,9 +112,11 @@ function choiceOnlyOne(...choices: Array>): Parser { }, new Output>()) ); } +/** Combines `parser` and the `nothing` parser, and output `null | T`. */ function optional(parser: Parser): Parser { return choice(parser, nothing()); } +/** Takes all parsers and applies them one after another. */ function sequence>( ...sequence: { [I in keyof T]: Parser } & { length: T["length"] } ): Parser { @@ -106,6 +135,11 @@ function sequence>( ) ); } +/** + * Parses `parser` multiple times and returns an `Array`. The resulting + * output includes all outputs from parsing nothing to parsing as many as + * possible. + */ function many(parser: Parser): Parser> { return choice( sequence(parser, lazy(() => many(parser))).map(( @@ -114,12 +148,17 @@ function many(parser: Parser): Parser> { nothing().map(() => []), ); } +/** Like `many` but parses at least once. */ function manyAtLeastOnce(parser: Parser): Parser> { return sequence(parser, many(parser)).map(([first, rest]) => [ first, ...rest, ]); } +/** + * Parses `parser` multiple times and returns an `Array`. This function is + * exhaustive. + */ function all(parser: Parser): Parser> { return choiceOnlyOne( sequence(parser, lazy(() => all(parser))).map(( @@ -128,24 +167,32 @@ function all(parser: Parser): Parser> { nothing().map(() => []), ); } +/** Like `all` but parses at least once. */ function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)).map(([first, rest]) => [ first, ...rest, ]); } +/** Parses whitespaces. */ function allSpace(): Parser { return match(/\s*/).map(([space]) => space); } +/** Parses lowercase word. */ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); } +/** + * Parses all at least one uppercase words and combines them all into single + * string. This function is exhaustive like `all`. + */ function properWords(): Parser { return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)) .map( (array) => array.join(" "), ); } +/** Parses word only from `set`. */ function wordFrom(set: Set, description: string): Parser { return word().map((word) => { if (set.has(word)) { @@ -155,6 +202,7 @@ function wordFrom(set: Set, description: string): Parser { } }); } +/** Parses a specific word. */ function specificWord(thatWord: string): Parser { return word().map((thisWord) => { if (thatWord === thisWord) { @@ -164,9 +212,11 @@ function specificWord(thatWord: string): Parser { } }); } +/** Parses headword. */ function headWord(): Parser { return wordFrom(CONTENT_WORD, "headword"); } +/** Parses a single modifier. */ function modifier(): Parser { return choice( specificWord("nanpa") @@ -194,6 +244,7 @@ function modifier(): Parser { // TODO: cardinal modifier ); } +/** Parses phrase. */ function phrase(): Parser { return sequence(headWord(), many(modifier())).map( ([headWord, modifiers]) => ({ @@ -202,6 +253,7 @@ function phrase(): Parser { }), ); } +/** Parses phrases including preverbial phrases. */ function fullPhrase(): Parser { return sequence( optional(wordFrom(PREVERB, "preverb")), @@ -221,6 +273,7 @@ function fullPhrase(): Parser { } }); } +/** Parses prepositional phrase. */ function preposition(): Parser { return sequence(wordFrom(PREPOSITION, "preposition"), fullPhrase()).map( ([preposition, phrase]) => ({ @@ -229,12 +282,14 @@ function preposition(): Parser { }), ); } +/** Parses phrases separated by _en_. */ function enPhrases(): Parser> { return sequence( fullPhrase(), many(specificWord("en").with(fullPhrase())), ).map(([first, rest]) => [first, ...rest]); } +/** Parses a single predicate. */ function predicate(): Parser { return choice( preposition().map((preposition) => ({ type: "preposition", preposition })), @@ -243,6 +298,7 @@ function predicate(): Parser { ), ); } +/** Parses a single clause. */ function clause(): Parser { return choice( sequence( @@ -296,6 +352,7 @@ function clause(): Parser { })), ); } +/** Parses a single clause including precaluse and postclause. */ function fullClause(): Parser { return sequence(optional(specificWord("taso")), clause()).map( ([taso, clause]) => ({ @@ -304,6 +361,7 @@ function fullClause(): Parser { }), ); } +/** Parses a single full sentence without punctuations. */ function sentence(): Parser { return choice( fullClause().map( @@ -314,6 +372,7 @@ function sentence(): Parser { ), ); } +/** The full parser. */ function fullSentence(): Parser { return allSpace() .with(sentence()) @@ -321,6 +380,7 @@ function fullSentence(): Parser { .skip(allSpace()) .skip(eol()); } +/** The full parser. */ export function parser(src: string): Output { return fullSentence() .parser(src) diff --git a/src/translation.ts b/src/translation.ts index d2af1d2..cf56c76 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -1,3 +1,4 @@ +/** Noun translations. */ export const NOUN = { akesi: ["reptile", "reptiles", "amphibian", "amphibians"], ala: ["nothing", "no"], @@ -112,6 +113,7 @@ export const NOUN = { weka: ["leaving"], wile: ["want", "wants", "need", "needs"], }; +/** Adjective translations. */ export const ADJECTIVE = { akesi: ["reptilian", "amphibian"], ala: ["not", "no"], @@ -225,6 +227,7 @@ export const ADJECTIVE = { weka: ["leaving"], wile: [], }; +/** Adverb translations. */ export const ADVERB = { akesi: [], ala: ["not"], diff --git a/src/vocabulary.ts b/src/vocabulary.ts index e5e683a..bd52bdf 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -1,3 +1,4 @@ +/** Particles. */ export const PARTICLES = new Set([ "a", "ala", @@ -11,6 +12,7 @@ export const PARTICLES = new Set([ "pi", "taso", ]); +/** Content words. */ export const CONTENT_WORD = new Set([ "akesi", "ala", @@ -125,6 +127,7 @@ export const CONTENT_WORD = new Set([ "weka", "wile", ]); +/** Special subjects that doesn't use _li_ */ export const SPECIAL_SUBJECT = new Set(["mi", "sina"]); export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]); export const PREVERB = new Set([ @@ -138,7 +141,9 @@ export const PREVERB = new Set([ "sona", "wile", ]); +/** Prepositions. */ export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); +/** Full vocabulary. */ export const VOCABULARY = new Set([ ...PARTICLES, ...CONTENT_WORD, From d93ccb8deb151c9c7e7468345e197b19aaf5d821 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 09:41:27 +0800 Subject: [PATCH 092/271] include kepeken in content word set --- src/translation.ts | 1 + src/vocabulary.ts | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/translation.ts b/src/translation.ts index cf56c76..60db1cf 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -22,6 +22,7 @@ export const NOUN = { kama: ["arriving"], kasi: ["plant", "plants"], ken: ["ability", "abilities", "possibility", "possibilities"], + kepeken: [], kili: ["fruit", "fruits", "vegetable", "vegetables"], kiwen: ["hard thing", "hard things"], ko: ["soft thing", "soft things", "powder"], diff --git a/src/vocabulary.ts b/src/vocabulary.ts index bd52bdf..fb5e1f9 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -36,6 +36,7 @@ export const CONTENT_WORD = new Set([ "kama", "kasi", "ken", + "kepeken", "kili", "kiwen", "ko", @@ -147,5 +148,4 @@ export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); export const VOCABULARY = new Set([ ...PARTICLES, ...CONTENT_WORD, - ...PREPOSITION, ]); From 8441913671ff6a56ec491b7b7d4ef246aba58e12 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 09:43:29 +0800 Subject: [PATCH 093/271] update limitations --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index e2812e5..443a976 100644 --- a/README.md +++ b/README.md @@ -78,6 +78,8 @@ The whole code is being rewritten and there will be new different limitations. - ✏️ "X ala X" constructions - ✏️ Extended numbering system - ✏️ Commas +- Non-pu vocabulary minus "pu" plus "tonsi" - Multiple sentences - Clause with both "li" and "o" - "kepeken" as headword or modifier +- "taso" as headword (it can be used as modifier) From c00fc0a25d66bc1584340d5b0b1218a09db07f6a Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 13:49:53 +0800 Subject: [PATCH 094/271] include kepeken for consistency --- src/translation.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/translation.ts b/src/translation.ts index 60db1cf..4a34d1e 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -138,6 +138,7 @@ export const ADJECTIVE = { kama: ["arriving"], kasi: ["plant-like"], ken: [], + kepeken: [], kili: [], kiwen: ["hard"], ko: ["soft"], @@ -252,6 +253,7 @@ export const ADVERB = { kama: [], kasi: [], ken: [], + kepeken: [], kili: [], kiwen: [], ko: [], From 2f92a2d83f950123cb22aaf12def6eb868a0b245 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:01:01 +0800 Subject: [PATCH 095/271] add parser for ordinal modifier --- src/parser.ts | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 373d4fe..b811af2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -216,6 +216,23 @@ function specificWord(thatWord: string): Parser { function headWord(): Parser { return wordFrom(CONTENT_WORD, "headword"); } +/** Parses number words in order. */ +function number(): Parser> { + return sequence( + all(specificWord("ale")), + all(specificWord("mute")), + all(specificWord("luka")), + all(specificWord("tu")), + all(specificWord("wan")), + ).map((array) => { + const output = array.flat(); + if (output.length === 0) { + throw new UnreachableError(); + } else { + return output; + } + }); +} /** Parses a single modifier. */ function modifier(): Parser { return choice( @@ -241,7 +258,7 @@ function modifier(): Parser { type: "pi", phrase, })), - // TODO: cardinal modifier + number().map((number) => ({ type: "cardinal", number })), ); } /** Parses phrase. */ From 5d548746b3b118525e5d2b09cf0808b6291187a1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:15:00 +0800 Subject: [PATCH 096/271] implement cardinal for phrases --- src/ast.ts | 28 ++++++++++++++++------------ src/parser.ts | 44 +++++++++++++++++++++++++++----------------- 2 files changed, 43 insertions(+), 29 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index aa5ab59..e0b9282 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -2,39 +2,43 @@ export type Modifier = | { type: "word"; word: string } | { type: "proper words"; words: string } - | { type: "pi"; phrase: FullPhrase } - | { type: "nanpa ordinal"; phrase: FullPhrase } + | { type: "pi"; phrase: Phrase } + | { type: "nanpa ordinal"; phrase: Phrase } | { type: "cardinal"; number: Array }; /** Represents a simple phrase. */ -export type Phrase = { headWord: string; modifiers: Array }; +export type SimplePhrase = { + type: "default"; + headWord: string; + modifiers: Array; +} | { type: "cardinal"; number: Array }; /** Represents a phrase including preverbial phrases. */ -export type FullPhrase = - | { type: "default"; phrase: Phrase } - | { type: "preverb"; preverb: string; phrase: Phrase }; +export type Phrase = + | { type: "default"; phrase: SimplePhrase } + | { type: "preverb"; preverb: string; phrase: SimplePhrase }; /** Represents a single prepositional phrase. */ -export type Preposition = { preposition: string; phrase: FullPhrase }; +export type Preposition = { preposition: string; phrase: Phrase }; /** Represents a single predicate. */ export type Predicate = - | { type: "default"; predicate: FullPhrase } + | { type: "default"; predicate: Phrase } | { type: "preposition"; preposition: Preposition }; /** Represents a simple clause. */ export type Clause = - | { type: "en phrases"; phrases: Array } - | { type: "o vocative"; phrases: Array } + | { type: "en phrases"; phrases: Array } + | { type: "o vocative"; phrases: Array } | { type: "li clause"; - subjects: Array; + subjects: Array; predicates: Array; prepositions: Array; } | { type: "o clause"; - subjects: Array; + subjects: Array; predicates: Array; prepositions: Array; } diff --git a/src/parser.ts b/src/parser.ts index b811af2..91827a0 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,12 +1,12 @@ import { Clause, FullClause, - FullPhrase, Modifier, Phrase, Predicate, Preposition, Sentence, + SimplePhrase, } from "./ast.ts"; import { UnreachableError, UnrecognizedError } from "./error.ts"; import { Output } from "./output.ts"; @@ -237,7 +237,7 @@ function number(): Parser> { function modifier(): Parser { return choice( specificWord("nanpa") - .with(fullPhrase()) + .with(phrase()) .map((phrase) => ({ type: "nanpa ordinal", phrase, @@ -253,7 +253,7 @@ function modifier(): Parser { words, })), specificWord("pi") - .with(fullPhrase()) + .with(phrase()) .map((phrase) => ({ type: "pi", phrase, @@ -262,19 +262,26 @@ function modifier(): Parser { ); } /** Parses phrase. */ -function phrase(): Parser { - return sequence(headWord(), many(modifier())).map( - ([headWord, modifiers]) => ({ - headWord, - modifiers, - }), +function simplePhrase(): Parser { + return choice( + number().map((number) => ({ + type: "cardinal", + number, + } as SimplePhrase)), + sequence(headWord(), many(modifier())).map( + ([headWord, modifiers]) => ({ + type: "default", + headWord, + modifiers, + }), + ), ); } /** Parses phrases including preverbial phrases. */ -function fullPhrase(): Parser { +function phrase(): Parser { return sequence( optional(wordFrom(PREVERB, "preverb")), - lazy(phrase), + lazy(simplePhrase), ).map(([preverb, phrase]) => { if (preverb) { return { @@ -292,7 +299,7 @@ function fullPhrase(): Parser { } /** Parses prepositional phrase. */ function preposition(): Parser { - return sequence(wordFrom(PREPOSITION, "preposition"), fullPhrase()).map( + return sequence(wordFrom(PREPOSITION, "preposition"), phrase()).map( ([preposition, phrase]) => ({ preposition, phrase, @@ -300,17 +307,17 @@ function preposition(): Parser { ); } /** Parses phrases separated by _en_. */ -function enPhrases(): Parser> { +function enPhrases(): Parser> { return sequence( - fullPhrase(), - many(specificWord("en").with(fullPhrase())), + phrase(), + many(specificWord("en").with(phrase())), ).map(([first, rest]) => [first, ...rest]); } /** Parses a single predicate. */ function predicate(): Parser { return choice( preposition().map((preposition) => ({ type: "preposition", preposition })), - fullPhrase().map( + phrase().map( (predicate) => ({ type: "default", predicate } as Predicate), ), ); @@ -326,7 +333,10 @@ function clause(): Parser { ).map(([subject, predicate, morePredicates, prepositions]) => ({ type: "li clause", subjects: [ - { type: "default", phrase: { headWord: subject, modifiers: [] } }, + { + type: "default", + phrase: { type: "default", headWord: subject, modifiers: [] }, + }, ], predicates: [predicate, ...morePredicates], prepositions, From e808ed3d4f689756ee599a03305f7a83273db31e Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:21:13 +0800 Subject: [PATCH 097/271] implement anu seme as special postclause --- src/ast.ts | 2 +- src/parser.ts | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index e0b9282..a16b3e7 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -45,7 +45,7 @@ export type Clause = | { type: "prepositions"; prepositions: Array }; /** Represents a clause including preclause and postclause. */ -export type FullClause = { taso: boolean; clause: Clause }; +export type FullClause = { taso: boolean; anuSeme: boolean; clause: Clause }; /** Represents a single full sentence. */ export type Sentence = diff --git a/src/parser.ts b/src/parser.ts index 91827a0..9de6f08 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -381,9 +381,14 @@ function clause(): Parser { } /** Parses a single clause including precaluse and postclause. */ function fullClause(): Parser { - return sequence(optional(specificWord("taso")), clause()).map( - ([taso, clause]) => ({ + return sequence( + optional(specificWord("taso")), + clause(), + optional(sequence(specificWord("anu"), specificWord("seme"))), + ).map( + ([taso, clause, anuSeme]) => ({ taso: !!taso, + anuSeme: !!anuSeme, clause, }), ); From 4801d2510d6e43cc30ca5fb90e1760d1ff18a852 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:21:24 +0800 Subject: [PATCH 098/271] ass anu seme as special clause --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 443a976..6ff3754 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ These are the terminology used in [limitations]. **These are not official gramma - Modifier – A part of speech that modifies headword or another modifier. - Phrase – Headword and its modifiers. - Preclause – "taso" or "a" particle before clauses. -- Postclause – "a" particle after clauses. +- Postclause – "a" particle or "anu seme" phrase after clauses. - Clause – Phrase or sentence found before and after "la". - Proper Word – Proper name; Capitalized in Toki Pona. From edfec29ade400678591775e7be43f71114870e71 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:31:19 +0800 Subject: [PATCH 099/271] implement "e" object parser --- src/ast.ts | 4 ++-- src/parser.ts | 15 ++++++++++++--- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index a16b3e7..7616b82 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -23,8 +23,8 @@ export type Preposition = { preposition: string; phrase: Phrase }; /** Represents a single predicate. */ export type Predicate = - | { type: "default"; predicate: Phrase } - | { type: "preposition"; preposition: Preposition }; + | { type: "default"; predicate: Phrase; objects: Array } + | { type: "preposition"; preposition: Preposition; objects: Array }; /** Represents a simple clause. */ export type Clause = diff --git a/src/parser.ts b/src/parser.ts index 9de6f08..da847ed 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -313,12 +313,21 @@ function enPhrases(): Parser> { many(specificWord("en").with(phrase())), ).map(([first, rest]) => [first, ...rest]); } +function objects(): Parser> { + return manyAtLeastOnce(specificWord("e").with(phrase())); +} /** Parses a single predicate. */ function predicate(): Parser { return choice( - preposition().map((preposition) => ({ type: "preposition", preposition })), - phrase().map( - (predicate) => ({ type: "default", predicate } as Predicate), + sequence(preposition(), objects()).map(([preposition, objects]) => ({ + type: "preposition", + preposition, + objects, + })), + sequence(phrase(), objects()).map( + ( + [predicate, objects], + ) => ({ type: "default", predicate, objects } as Predicate), ), ); } From 62e6b12a83723d9df84f8f742ac016bd50178575 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 14:36:59 +0800 Subject: [PATCH 100/271] improve comments --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index da847ed..f41982e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -316,7 +316,7 @@ function enPhrases(): Parser> { function objects(): Parser> { return manyAtLeastOnce(specificWord("e").with(phrase())); } -/** Parses a single predicate. */ +/** Parses a single predicate without _li_ nor _o_. */ function predicate(): Parser { return choice( sequence(preposition(), objects()).map(([preposition, objects]) => ({ From da540800940ac2b5706bb429c07f5390acec2caa Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 15:05:35 +0800 Subject: [PATCH 101/271] improve comment --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index f41982e..242584d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -157,7 +157,7 @@ function manyAtLeastOnce(parser: Parser): Parser> { } /** * Parses `parser` multiple times and returns an `Array`. This function is - * exhaustive. + * exhaustive unlike `many`. */ function all(parser: Parser): Parser> { return choiceOnlyOne( From 9f2219b761490e243651b7119decf625a2bc3d2f Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 15:48:51 +0800 Subject: [PATCH 102/271] implement X ala X parser --- src/ast.ts | 22 +++++++++++--- src/parser.ts | 84 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 68 insertions(+), 38 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 7616b82..89d0bea 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,6 +1,6 @@ /** Represents a single modifier. */ export type Modifier = - | { type: "word"; word: string } + | { type: "word"; word: string; alaQuestion: boolean } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } | { type: "nanpa ordinal"; phrase: Phrase } @@ -10,21 +10,35 @@ export type Modifier = export type SimplePhrase = { type: "default"; headWord: string; + alaQuestion: boolean; modifiers: Array; } | { type: "cardinal"; number: Array }; /** Represents a phrase including preverbial phrases. */ export type Phrase = | { type: "default"; phrase: SimplePhrase } - | { type: "preverb"; preverb: string; phrase: SimplePhrase }; + | { + type: "preverb"; + preverb: string; + alaQuestion: boolean; + phrase: SimplePhrase; + }; /** Represents a single prepositional phrase. */ -export type Preposition = { preposition: string; phrase: Phrase }; +export type Preposition = { + preposition: string; + alaQuestion: boolean; + phrase: Phrase; +}; /** Represents a single predicate. */ export type Predicate = | { type: "default"; predicate: Phrase; objects: Array } - | { type: "preposition"; preposition: Preposition; objects: Array }; + | { + type: "preposition"; + preposition: Preposition; + objects: Array; + }; /** Represents a simple clause. */ export type Clause = diff --git a/src/parser.ts b/src/parser.ts index 242584d..cdf5b51 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -212,9 +212,18 @@ function specificWord(thatWord: string): Parser { } }); } -/** Parses headword. */ -function headWord(): Parser { - return wordFrom(CONTENT_WORD, "headword"); +/** Parses X ala X construction as well as just X */ +function alaQuestion(parser: Parser): Parser<[string, boolean]> { + return choice( + sequence(parser.skip(specificWord("ala")), parser).map(([left, right]) => { + if (left === right) { + return [left, true] as [string, boolean]; + } else { + throw new UnreachableError(); + } + }), + parser.map((word) => [word, false]), + ); } /** Parses number words in order. */ function number(): Parser> { @@ -268,43 +277,45 @@ function simplePhrase(): Parser { type: "cardinal", number, } as SimplePhrase)), - sequence(headWord(), many(modifier())).map( - ([headWord, modifiers]) => ({ - type: "default", - headWord, - modifiers, - }), - ), + sequence(alaQuestion(wordFrom(CONTENT_WORD, "headword")), many(modifier())) + .map( + ([[headWord, alaQuestion], modifiers]) => ({ + type: "default", + headWord, + alaQuestion, + modifiers, + }), + ), ); } /** Parses phrases including preverbial phrases. */ function phrase(): Parser { - return sequence( - optional(wordFrom(PREVERB, "preverb")), - lazy(simplePhrase), - ).map(([preverb, phrase]) => { - if (preverb) { - return { - type: "preverb", - preverb, - phrase, - }; - } else { - return { - type: "default", - phrase, - }; - } - }); + return choice( + sequence( + alaQuestion(wordFrom(PREVERB, "preverb")), + lazy(simplePhrase), + ).map(([[preverb, alaQuestion], phrase]) => ({ + type: "preverb", + preverb, + alaQuestion, + phrase, + } as Phrase)), + lazy(simplePhrase).map((phrase) => ({ + type: "default", + phrase, + })), + ); } /** Parses prepositional phrase. */ function preposition(): Parser { - return sequence(wordFrom(PREPOSITION, "preposition"), phrase()).map( - ([preposition, phrase]) => ({ - preposition, - phrase, - }), - ); + return sequence(alaQuestion(wordFrom(PREPOSITION, "preposition")), phrase()) + .map( + ([[preposition, alaQuestion], phrase]) => ({ + preposition, + alaQuestion, + phrase, + }), + ); } /** Parses phrases separated by _en_. */ function enPhrases(): Parser> { @@ -344,7 +355,12 @@ function clause(): Parser { subjects: [ { type: "default", - phrase: { type: "default", headWord: subject, modifiers: [] }, + phrase: { + type: "default", + headWord: subject, + alaQuestion: false, + modifiers: [], + }, }, ], predicates: [predicate, ...morePredicates], From 87a421d779089685de7ef94e77082d174d259690 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 15:56:46 +0800 Subject: [PATCH 103/271] fix error --- src/parser.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index cdf5b51..6a909f2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -325,7 +325,7 @@ function enPhrases(): Parser> { ).map(([first, rest]) => [first, ...rest]); } function objects(): Parser> { - return manyAtLeastOnce(specificWord("e").with(phrase())); + return many(specificWord("e").with(phrase())); } /** Parses a single predicate without _li_ nor _o_. */ function predicate(): Parser { @@ -443,3 +443,5 @@ export function parser(src: string): Output { .parser(src) .map(({ value }) => value); } +console.log(predicate().parser("pona")); +debugger; From 50dffa886937bda0dbfe71619d033fc65a3fd7b3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 15:57:49 +0800 Subject: [PATCH 104/271] whoops, I forgot to remove this --- src/parser.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 6a909f2..627873b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -443,5 +443,3 @@ export function parser(src: string): Output { .parser(src) .map(({ value }) => value); } -console.log(predicate().parser("pona")); -debugger; From 8b3eef4d285091b040234aa8f1d5fc3114289968 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 16:01:33 +0800 Subject: [PATCH 105/271] allow other punctuation --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 627873b..3dfb8f6 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -433,7 +433,7 @@ function sentence(): Parser { function fullSentence(): Parser { return allSpace() .with(sentence()) - .skip(optional(match(/\./))) + .skip(optional(match(/[\.?!:]/))) .skip(allSpace()) .skip(eol()); } From 246fb7412fd6e7eb680271e8a2a269578684f7da Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 16:03:56 +0800 Subject: [PATCH 106/271] improve comments --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 3dfb8f6..2d875bf 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -437,7 +437,7 @@ function fullSentence(): Parser { .skip(allSpace()) .skip(eol()); } -/** The full parser. */ +/** A Toki Pona sentence parser. */ export function parser(src: string): Output { return fullSentence() .parser(src) From b373735641cc1e4069acfd2caf696fc67b7e6fbb Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 14 Jan 2024 16:10:37 +0800 Subject: [PATCH 107/271] allow prepositions to be modified --- src/ast.ts | 1 + src/parser.ts | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 89d0bea..02b22c3 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -28,6 +28,7 @@ export type Phrase = export type Preposition = { preposition: string; alaQuestion: boolean; + modifiers: Array; phrase: Phrase; }; diff --git a/src/parser.ts b/src/parser.ts index 2d875bf..fe3ab90 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -308,11 +308,16 @@ function phrase(): Parser { } /** Parses prepositional phrase. */ function preposition(): Parser { - return sequence(alaQuestion(wordFrom(PREPOSITION, "preposition")), phrase()) + return sequence( + alaQuestion(wordFrom(PREPOSITION, "preposition")), + many(modifier()), + phrase(), + ) .map( - ([[preposition, alaQuestion], phrase]) => ({ + ([[preposition, alaQuestion], modifiers, phrase]) => ({ preposition, alaQuestion, + modifiers, phrase, }), ); From 50ea32fe2f15ae488175f45c29e6cd2523d3b075 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 07:43:03 +0800 Subject: [PATCH 108/271] revert map function implementation for speed --- src/output.ts | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/output.ts b/src/output.ts index 41829f3..b87715e 100644 --- a/src/output.ts +++ b/src/output.ts @@ -24,6 +24,15 @@ export class Output { this.error = new OutputError("no error provided"); } } + private setError(error: OutputError) { + if (this.output.length === 0 && !this.error) { + this.error = error; + } + } + private push(value: T): void { + this.output.push(value); + this.error = null; + } private append({ output, error }: Output): void { this.output = [...this.output, ...output]; if (this.output.length > 0) { @@ -40,17 +49,22 @@ export class Output { * function can throw OutputError; Other kinds of errors will be ignored. */ map(mapper: (value: T) => U): Output { - return this.flatMap((value) => { + if (this.isError()) { + return new Output(this.error); + } + const wholeOutput = new Output(); + for (const value of this.output) { try { - return new Output([mapper(value)]); + wholeOutput.push(mapper(value)); } catch (error) { if (error instanceof OutputError) { - return new Output(error); + this.setError(error); } else { throw error; } } - }); + } + return wholeOutput; } /** * Accepts mapper function that returns another Output. flatMap takes all From 87b94d16b97a4b8132b8abc35521c9fb7db2cc4c Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 07:47:39 +0800 Subject: [PATCH 109/271] add missing comment --- src/output.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/output.ts b/src/output.ts index b87715e..08b088b 100644 --- a/src/output.ts +++ b/src/output.ts @@ -41,6 +41,7 @@ export class Output { this.error = error; } } + /** Returns true when the output array is empty */ isError(): boolean { return this.output.length === 0; } From aee9a2ffb6e7c8ee1b3c177a84f8517866357009 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 07:53:32 +0800 Subject: [PATCH 110/271] improve error handling in test-parser.ts --- test-parser.ts | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/test-parser.ts b/test-parser.ts index 6b7c415..2d8ed0f 100644 --- a/test-parser.ts +++ b/test-parser.ts @@ -1,4 +1,13 @@ +import { OutputError } from "./src/error.ts"; import { parser } from "./src/parser.ts"; const input = await Deno.readTextFile("./test.txt"); -console.log(JSON.stringify(parser(input), null, 2)); +console.log( + JSON.stringify(parser(input), (key, value) => { + if (key === "error") { + return (value as OutputError).message; + } else { + return value; + } + }, 2), +); From 677ccc40bb96e16a6e8d2087d1cbae9ee4f92d91 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 07:55:01 +0800 Subject: [PATCH 111/271] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 23ac8ec..b8c9168 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,7 +12,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e Inside update (intended for developers): -- Rewritten whole code to use module and TypeScript. +- Rewritten whole code to use TypeScript, module, and functional programming. - Rewritten parser to use parser combinator. - Add language codes to html. From c140d37df7e4b7538685d69d488db8565a292c3e Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 07:57:04 +0800 Subject: [PATCH 112/271] improve function naming --- src/parser.ts | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index fe3ab90..95e4427 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -213,7 +213,9 @@ function specificWord(thatWord: string): Parser { }); } /** Parses X ala X construction as well as just X */ -function alaQuestion(parser: Parser): Parser<[string, boolean]> { +function optionalAlaQuestion( + parser: Parser, +): Parser<[string, boolean]> { return choice( sequence(parser.skip(specificWord("ala")), parser).map(([left, right]) => { if (left === right) { @@ -277,7 +279,10 @@ function simplePhrase(): Parser { type: "cardinal", number, } as SimplePhrase)), - sequence(alaQuestion(wordFrom(CONTENT_WORD, "headword")), many(modifier())) + sequence( + optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), + many(modifier()), + ) .map( ([[headWord, alaQuestion], modifiers]) => ({ type: "default", @@ -292,7 +297,7 @@ function simplePhrase(): Parser { function phrase(): Parser { return choice( sequence( - alaQuestion(wordFrom(PREVERB, "preverb")), + optionalAlaQuestion(wordFrom(PREVERB, "preverb")), lazy(simplePhrase), ).map(([[preverb, alaQuestion], phrase]) => ({ type: "preverb", @@ -309,7 +314,7 @@ function phrase(): Parser { /** Parses prepositional phrase. */ function preposition(): Parser { return sequence( - alaQuestion(wordFrom(PREPOSITION, "preposition")), + optionalAlaQuestion(wordFrom(PREPOSITION, "preposition")), many(modifier()), phrase(), ) From 100fec34de54b10df7299372049c122c0ca99572 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 08:06:47 +0800 Subject: [PATCH 113/271] add modifiers for preverb --- src/ast.ts | 1 + src/parser.ts | 4 +++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/ast.ts b/src/ast.ts index 02b22c3..e971f0b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -21,6 +21,7 @@ export type Phrase = type: "preverb"; preverb: string; alaQuestion: boolean; + modifiers: Array; phrase: SimplePhrase; }; diff --git a/src/parser.ts b/src/parser.ts index 95e4427..a67fcfb 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -298,11 +298,13 @@ function phrase(): Parser { return choice( sequence( optionalAlaQuestion(wordFrom(PREVERB, "preverb")), + many(lazy(modifier)), lazy(simplePhrase), - ).map(([[preverb, alaQuestion], phrase]) => ({ + ).map(([[preverb, alaQuestion], modifiers, phrase]) => ({ type: "preverb", preverb, alaQuestion, + modifiers, phrase, } as Phrase)), lazy(simplePhrase).map((phrase) => ({ From 45f0267da9d29712f355259ac0523e56a1e562d8 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 08:42:57 +0800 Subject: [PATCH 114/271] support ali for number words --- src/parser.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index a67fcfb..0e649aa 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -230,7 +230,12 @@ function optionalAlaQuestion( /** Parses number words in order. */ function number(): Parser> { return sequence( - all(specificWord("ale")), + all(choice( + specificWord("ale"), + specificWord( + "ali", + ), + )), all(specificWord("mute")), all(specificWord("luka")), all(specificWord("tu")), From ad595791c0d708bc0e512a9d6b1371cff9d93134 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:06:14 +0800 Subject: [PATCH 115/271] Move limitation to wiki --- README.md | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/README.md b/README.md index 6ff3754..bebb8f6 100644 --- a/README.md +++ b/README.md @@ -66,20 +66,3 @@ The following are currently unrecognized (non-definitive but pedantic). ✏️ m - "o a" - "e a" - "pi a" - -## New Limitations - -The whole code is being rewritten and there will be new different limitations. - - - -- ✏️ "a" particle -- ✏️ "anu" particle -- ✏️ "X ala X" constructions -- ✏️ Extended numbering system -- ✏️ Commas -- Non-pu vocabulary minus "pu" plus "tonsi" -- Multiple sentences -- Clause with both "li" and "o" -- "kepeken" as headword or modifier -- "taso" as headword (it can be used as modifier) From 799214d367bf954acef8ba77e94db6d8f3bde5e2 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:36:33 +0800 Subject: [PATCH 116/271] implement multiple sentence parser --- src/ast.ts | 7 ++++--- src/parser.ts | 39 +++++++++++++++------------------------ src/translator.ts | 4 +++- 3 files changed, 22 insertions(+), 28 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index e971f0b..48cbb93 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -64,6 +64,7 @@ export type Clause = export type FullClause = { taso: boolean; anuSeme: boolean; clause: Clause }; /** Represents a single full sentence. */ -export type Sentence = - | { type: "single clause"; clause: FullClause } - | { type: "la clauses"; left: FullClause; right: Sentence }; +export type Sentence = { + laClauses: Array; + punctuation: string; +}; diff --git a/src/parser.ts b/src/parser.ts index 0e649aa..dc9696e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -174,10 +174,6 @@ function allAtLeastOnce(parser: Parser): Parser> { ...rest, ]); } -/** Parses whitespaces. */ -function allSpace(): Parser { - return match(/\s*/).map(([space]) => space); -} /** Parses lowercase word. */ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); @@ -435,28 +431,23 @@ function fullClause(): Parser { }), ); } -/** Parses a single full sentence without punctuations. */ +/** Parses a single full sentence with optional punctuations. */ function sentence(): Parser { - return choice( - fullClause().map( - (clause) => ({ type: "single clause", clause } as Sentence), - ), - sequence(fullClause().skip(specificWord("la")), lazy(sentence)).map( - ([left, right]) => ({ type: "la clauses", left, right }), + return sequence( + fullClause(), + many(specificWord("la").with(fullClause())), + choice( + eol().map((_) => ""), + match(/([\.,:?!])\s*/).map(([_, punctuation]) => punctuation), ), - ); -} -/** The full parser. */ -function fullSentence(): Parser { - return allSpace() - .with(sentence()) - .skip(optional(match(/[\.?!:]/))) - .skip(allSpace()) - .skip(eol()); -} -/** A Toki Pona sentence parser. */ -export function parser(src: string): Output { - return fullSentence() + ).map(([clause, moreClauses, punctuation]) => ({ + laClauses: [clause, ...moreClauses], + punctuation, + })); +} +/** A multiple Toki Pona sentence parser. */ +export function parser(src: string): Output> { + return match(/\s*/).with(allAtLeastOnce(sentence())) .parser(src) .map(({ value }) => value); } diff --git a/src/translator.ts b/src/translator.ts index 9c4b57a..6dba73d 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -8,5 +8,7 @@ function translateSentence(output: Sentence): TranslationOutput { throw new Error("todo"); } function translate(src: string): TranslationOutput { - return parser(src).flatMap(translateSentence); + return parser(src).flatMap((sentences) => + new Output(sentences).flatMap(translateSentence) + ); } From 773610b8ba427c4ec39fb194175daaadc16eb732 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:38:25 +0800 Subject: [PATCH 117/271] fix error handler --- test-parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test-parser.ts b/test-parser.ts index 2d8ed0f..6ab0da2 100644 --- a/test-parser.ts +++ b/test-parser.ts @@ -5,7 +5,7 @@ const input = await Deno.readTextFile("./test.txt"); console.log( JSON.stringify(parser(input), (key, value) => { if (key === "error") { - return (value as OutputError).message; + return (value as null | OutputError)?.message; } else { return value; } From 0c6c456a6c9f0ebe7317b0401dbb23403b2dfa7e Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:40:06 +0800 Subject: [PATCH 118/271] this turns out to be unnecessary --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index dc9696e..9fcf208 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -438,7 +438,7 @@ function sentence(): Parser { many(specificWord("la").with(fullClause())), choice( eol().map((_) => ""), - match(/([\.,:?!])\s*/).map(([_, punctuation]) => punctuation), + match(/([.,:?!])\s*/).map(([_, punctuation]) => punctuation), ), ).map(([clause, moreClauses, punctuation]) => ({ laClauses: [clause, ...moreClauses], From 1fa932c49cc49f7f0f29568204436ae410016f8e Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:41:23 +0800 Subject: [PATCH 119/271] small update --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 9fcf208..fc61e52 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -437,7 +437,7 @@ function sentence(): Parser { fullClause(), many(specificWord("la").with(fullClause())), choice( - eol().map((_) => ""), + eol().map(() => ""), match(/([.,:?!])\s*/).map(([_, punctuation]) => punctuation), ), ).map(([clause, moreClauses, punctuation]) => ({ From 832bdea33c7d0b83df6debfe8bc56739fbffed1d Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 09:52:13 +0800 Subject: [PATCH 120/271] allow sentences to end in semicolon --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index fc61e52..07aa7ea 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -438,7 +438,7 @@ function sentence(): Parser { many(specificWord("la").with(fullClause())), choice( eol().map(() => ""), - match(/([.,:?!])\s*/).map(([_, punctuation]) => punctuation), + match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation), ), ).map(([clause, moreClauses, punctuation]) => ({ laClauses: [clause, ...moreClauses], From 8b2afb2df0ed002a20186b0468a1c1312fefcb44 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 10:46:52 +0800 Subject: [PATCH 121/271] implement comma --- src/parser.ts | 55 ++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 07aa7ea..830c272 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -174,6 +174,10 @@ function allAtLeastOnce(parser: Parser): Parser> { ...rest, ]); } +/** Parses comma. */ +function optionalComma(): Parser { + return optional(match(/,\s*/).map(() => ",")); +} /** Parses lowercase word. */ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); @@ -334,11 +338,11 @@ function preposition(): Parser { function enPhrases(): Parser> { return sequence( phrase(), - many(specificWord("en").with(phrase())), + many(optionalComma().with(specificWord("en")).with(phrase())), ).map(([first, rest]) => [first, ...rest]); } function objects(): Parser> { - return many(specificWord("e").with(phrase())); + return many(optionalComma().with(specificWord("e")).with(phrase())); } /** Parses a single predicate without _li_ nor _o_. */ function predicate(): Parser { @@ -361,8 +365,8 @@ function clause(): Parser { sequence( wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), predicate(), - many(specificWord("li").with(predicate())), - many(preposition()), + many(optionalComma().with(specificWord("li")).with(predicate())), + many(optionalComma().with(preposition())), ).map(([subject, predicate, morePredicates, prepositions]) => ({ type: "li clause", subjects: [ @@ -379,7 +383,9 @@ function clause(): Parser { predicates: [predicate, ...morePredicates], prepositions, })), - manyAtLeastOnce(preposition()).map((prepositions) => ({ + manyAtLeastOnce(optionalComma().with(preposition())).map(( + prepositions, + ) => ({ type: "prepositions", prepositions, })), @@ -397,18 +403,34 @@ function clause(): Parser { })), sequence( enPhrases(), - manyAtLeastOnce(specificWord("li").with(predicate())), - many(preposition()), + manyAtLeastOnce( + optionalComma().with(specificWord("li")).with(predicate()), + ), + many(optionalComma().with(preposition())), ).map(([subjects, predicates, prepositions]) => ({ type: "li clause", subjects, predicates, prepositions, })), + sequence( + specificWord("o").with(predicate()), + manyAtLeastOnce( + optionalComma().with(specificWord("o")).with(predicate()), + ), + many(optionalComma().with(preposition())), + ).map(([predicate, morePredicates, prepositions]) => ({ + type: "o clause", + subjects: [], + predicates: [predicate, ...morePredicates], + prepositions, + })), sequence( optional(enPhrases()), - manyAtLeastOnce(specificWord("o").with(predicate())), - many(preposition()), + manyAtLeastOnce( + optionalComma().with(specificWord("o")).with(predicate()), + ), + many(optionalComma().with(preposition())), ).map(([subjects, predicates, prepositions]) => ({ type: "o clause", subjects: subjects ?? [], @@ -420,9 +442,11 @@ function clause(): Parser { /** Parses a single clause including precaluse and postclause. */ function fullClause(): Parser { return sequence( - optional(specificWord("taso")), + optional(specificWord("taso").skip(optionalComma())), clause(), - optional(sequence(specificWord("anu"), specificWord("seme"))), + optional( + sequence(optionalComma(), specificWord("anu"), specificWord("seme")), + ), ).map( ([taso, clause, anuSeme]) => ({ taso: !!taso, @@ -431,11 +455,18 @@ function fullClause(): Parser { }), ); } +// parses _la_ with optional comma around +function la(): Parser { + return choice( + optionalComma().with(specificWord("la")), + specificWord("la").skip(optionalComma()), + ); +} /** Parses a single full sentence with optional punctuations. */ function sentence(): Parser { return sequence( fullClause(), - many(specificWord("la").with(fullClause())), + many(la().with(fullClause())), choice( eol().map(() => ""), match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation), From df85e2ab0f54ff0a3d237ca7b6c9ad085738a9d1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 10:48:33 +0800 Subject: [PATCH 122/271] turns out I still need this --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 830c272..f37293f 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -478,7 +478,7 @@ function sentence(): Parser { } /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { - return match(/\s*/).with(allAtLeastOnce(sentence())) + return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()) .parser(src) .map(({ value }) => value); } From ce8f757124a0a2f4d2486d384bdc295ff3ea47db Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 12:23:06 +0800 Subject: [PATCH 123/271] add todo --- src/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.ts b/src/parser.ts index f37293f..4bddd37 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -56,6 +56,7 @@ function match(regex: RegExp): Parser { if (match) { return new Output([{ value: match, rest: src.slice(match[0].length) }]); } else if (src === "") { + // TODO: replace this error, this isn't exactly unreachable return new Output(new UnreachableError()); } else { const token = src.match(/(.*)(?:\s|$)/)?.[1]; From 810f99b4af804f3c0c2d944225dd1758e457fd3d Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 12:39:32 +0800 Subject: [PATCH 124/271] improve la parser, avoid duplicate --- src/parser.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 4bddd37..f779eae 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -176,8 +176,12 @@ function allAtLeastOnce(parser: Parser): Parser> { ]); } /** Parses comma. */ +function comma(): Parser { + return match(/,\s*/).map(() => ","); +} +/** Parses an optional comma. */ function optionalComma(): Parser { - return optional(match(/,\s*/).map(() => ",")); + return optional(comma()); } /** Parses lowercase word. */ function word(): Parser { @@ -459,8 +463,9 @@ function fullClause(): Parser { // parses _la_ with optional comma around function la(): Parser { return choice( - optionalComma().with(specificWord("la")), - specificWord("la").skip(optionalComma()), + comma().with(specificWord("la")), + specificWord("la").skip(comma()), + specificWord("la"), ); } /** Parses a single full sentence with optional punctuations. */ From 2f7855b748f58ac2a1ccc9ad5298af32f1f7d088 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 13:23:06 +0800 Subject: [PATCH 125/271] implement quotation --- src/ast.ts | 8 +++++--- src/parser.ts | 16 ++++++++++++++++ 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 48cbb93..0d3aac2 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -4,7 +4,8 @@ export type Modifier = | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } | { type: "nanpa ordinal"; phrase: Phrase } - | { type: "cardinal"; number: Array }; + | { type: "cardinal"; number: Array } + | { type: "quotation"; quotation: Array }; /** Represents a simple phrase. */ export type SimplePhrase = { @@ -14,7 +15,7 @@ export type SimplePhrase = { modifiers: Array; } | { type: "cardinal"; number: Array }; -/** Represents a phrase including preverbial phrases. */ +/** Represents a phrase including preverbial phrases and quotations. */ export type Phrase = | { type: "default"; phrase: SimplePhrase } | { @@ -23,7 +24,8 @@ export type Phrase = alaQuestion: boolean; modifiers: Array; phrase: SimplePhrase; - }; + } + | { type: "quotation"; quotation: Array }; /** Represents a single prepositional phrase. */ export type Preposition = { diff --git a/src/parser.ts b/src/parser.ts index f779eae..e95fcd0 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -82,6 +82,12 @@ function eol(): Parser { } }); } +/** Parses without consuming the source string */ +function lookAhead(parser: Parser): Parser { + return new Parser((src) => + parser.parser(src).map(({ value }) => ({ value, rest: src })) + ); +} /** * Lazily evaluates the parser function only when needed. Useful for recursive * parsers. @@ -183,6 +189,9 @@ function comma(): Parser { function optionalComma(): Parser { return optional(comma()); } +function quotationMark(): Parser { + return match(/"\s*/).map(() => '"'); +} /** Parses lowercase word. */ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); @@ -280,6 +289,7 @@ function modifier(): Parser { phrase, })), number().map((number) => ({ type: "cardinal", number })), + quotation().map((quotation) => ({ type: "quotation", quotation })), ); } /** Parses phrase. */ @@ -321,6 +331,7 @@ function phrase(): Parser { type: "default", phrase, })), + quotation().map((quotation) => ({ type: "quotation", quotation })), ); } /** Parses prepositional phrase. */ @@ -475,6 +486,7 @@ function sentence(): Parser { many(la().with(fullClause())), choice( eol().map(() => ""), + lookAhead(quotationMark()).map(() => ""), match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation), ), ).map(([clause, moreClauses, punctuation]) => ({ @@ -482,6 +494,10 @@ function sentence(): Parser { punctuation, })); } +/** Parses multiple sentences inside quotation mark */ +function quotation(): Parser> { + return quotationMark().with(many(sentence())).skip(quotationMark()); +} /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()) From 60ab8e5bc97660a87a46eb6f324cd495a832c00c Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 13:29:20 +0800 Subject: [PATCH 126/271] avoid infinite recursion --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index e95fcd0..0ad1523 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -496,7 +496,7 @@ function sentence(): Parser { } /** Parses multiple sentences inside quotation mark */ function quotation(): Parser> { - return quotationMark().with(many(sentence())).skip(quotationMark()); + return quotationMark().with(many(lazy(sentence))).skip(quotationMark()); } /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { From 002b30cc541ea6ee855c2378e7e21d9b4824f411 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 19:10:18 +0800 Subject: [PATCH 127/271] small update reducing redundancies --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 0ad1523..d164fa8 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -442,7 +442,7 @@ function clause(): Parser { prepositions, })), sequence( - optional(enPhrases()), + enPhrases(), manyAtLeastOnce( optionalComma().with(specificWord("o")).with(predicate()), ), From ec63f3f7fa1fc2831a1653cef0324e6c028aaf10 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 19:11:40 +0800 Subject: [PATCH 128/271] small update --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index d164fa8..424044b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -449,7 +449,7 @@ function clause(): Parser { many(optionalComma().with(preposition())), ).map(([subjects, predicates, prepositions]) => ({ type: "o clause", - subjects: subjects ?? [], + subjects: subjects, predicates, prepositions, })), From 32544a5fc5f32a758e6cebede4b4cbf137785a5a Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 19:55:00 +0800 Subject: [PATCH 129/271] allow cardinal phrases to be modified --- src/ast.ts | 2 +- src/parser.ts | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 0d3aac2..498f981 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -13,7 +13,7 @@ export type SimplePhrase = { headWord: string; alaQuestion: boolean; modifiers: Array; -} | { type: "cardinal"; number: Array }; +} | { type: "cardinal"; number: Array; modifiers: Array }; /** Represents a phrase including preverbial phrases and quotations. */ export type Phrase = diff --git a/src/parser.ts b/src/parser.ts index 424044b..f3a18ff 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -295,9 +295,10 @@ function modifier(): Parser { /** Parses phrase. */ function simplePhrase(): Parser { return choice( - number().map((number) => ({ + sequence(number(), many(modifier())).map(([number, modifiers]) => ({ type: "cardinal", number, + modifiers, } as SimplePhrase)), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), From 0e0c357fd5409a2cb6cf6b839688fc6dc92f76f4 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 19:55:58 +0800 Subject: [PATCH 130/271] only allow cardinal to be more than 1 words --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index f3a18ff..a9684c6 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -256,10 +256,10 @@ function number(): Parser> { all(specificWord("wan")), ).map((array) => { const output = array.flat(); - if (output.length === 0) { - throw new UnreachableError(); - } else { + if (output.length >= 2) { return output; + } else { + throw new UnreachableError(); } }); } From 2f83ba9df97c5fcbbada19aafba0eff5134b5397 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 15 Jan 2024 19:57:28 +0800 Subject: [PATCH 131/271] use `many` in number parser --- src/parser.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index a9684c6..463adec 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -244,16 +244,16 @@ function optionalAlaQuestion( /** Parses number words in order. */ function number(): Parser> { return sequence( - all(choice( + many(choice( specificWord("ale"), specificWord( "ali", ), )), - all(specificWord("mute")), - all(specificWord("luka")), - all(specificWord("tu")), - all(specificWord("wan")), + many(specificWord("mute")), + many(specificWord("luka")), + many(specificWord("tu")), + many(specificWord("wan")), ).map((array) => { const output = array.flat(); if (output.length >= 2) { From 904e8580289d373c1f49ee3ece49dcd6ca3b19a4 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 16 Jan 2024 08:08:19 +0800 Subject: [PATCH 132/271] implement other kinds of quotation marks --- src/ast.ts | 14 ++++++++++++-- src/parser.ts | 43 +++++++++++++++++++++++++++++++++++++------ 2 files changed, 49 insertions(+), 8 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 498f981..d547369 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -5,7 +5,17 @@ export type Modifier = | { type: "pi"; phrase: Phrase } | { type: "nanpa ordinal"; phrase: Phrase } | { type: "cardinal"; number: Array } - | { type: "quotation"; quotation: Array }; + | { + type: "quotation"; + quotation: Quotation; + }; + +/** Represents quotation. */ +export type Quotation = { + sentences: Array; + leftMark: string; + rightMark: string; +}; /** Represents a simple phrase. */ export type SimplePhrase = { @@ -25,7 +35,7 @@ export type Phrase = modifiers: Array; phrase: SimplePhrase; } - | { type: "quotation"; quotation: Array }; + | { type: "quotation"; quotation: Quotation }; /** Represents a single prepositional phrase. */ export type Preposition = { diff --git a/src/parser.ts b/src/parser.ts index 463adec..db3fce5 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -5,6 +5,7 @@ import { Phrase, Predicate, Preposition, + Quotation, Sentence, SimplePhrase, } from "./ast.ts"; @@ -189,9 +190,6 @@ function comma(): Parser { function optionalComma(): Parser { return optional(comma()); } -function quotationMark(): Parser { - return match(/"\s*/).map(() => '"'); -} /** Parses lowercase word. */ function word(): Parser { return match(/([a-z]+)\s*/).map(([_, word]) => word); @@ -487,7 +485,7 @@ function sentence(): Parser { many(la().with(fullClause())), choice( eol().map(() => ""), - lookAhead(quotationMark()).map(() => ""), + lookAhead(closeQuotationMark()).map(() => ""), match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation), ), ).map(([clause, moreClauses, punctuation]) => ({ @@ -495,9 +493,42 @@ function sentence(): Parser { punctuation, })); } +/** Parses opening quotation mark */ +function openQuotationMark(): Parser { + return match(/(["“«「])\s*/).map(([_, mark]) => mark); +} +/** Parses closing quotation mark */ +function closeQuotationMark(): Parser { + return match(/(["”»」])\s*/).map(([_, mark]) => mark); +} /** Parses multiple sentences inside quotation mark */ -function quotation(): Parser> { - return quotationMark().with(many(lazy(sentence))).skip(quotationMark()); +function quotation(): Parser { + return sequence( + openQuotationMark(), + many(lazy(sentence)), + closeQuotationMark(), + ).map(([leftMark, sentences, rightMark]) => { + if (leftMark === '"' || leftMark === "“") { + if (rightMark !== '"' && rightMark !== "”") { + throw new UnrecognizedError("Mismatched quotation marks"); + } + } else if (leftMark === "«") { + if (rightMark !== "»") { + throw new UnrecognizedError("Mismatched quotation marks"); + } + } else if (leftMark === "「") { + if (rightMark !== "」") { + throw new UnrecognizedError("Mismatched quotation marks"); + } + } else { + throw new UnreachableError(); + } + return { + sentences, + leftMark, + rightMark, + }; + }); } /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { From 441b5832db29755a9267d3fdb5b1307f560f95ef Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 16 Jan 2024 08:25:26 +0800 Subject: [PATCH 133/271] improve error --- src/parser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index db3fce5..5444edc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -57,8 +57,7 @@ function match(regex: RegExp): Parser { if (match) { return new Output([{ value: match, rest: src.slice(match[0].length) }]); } else if (src === "") { - // TODO: replace this error, this isn't exactly unreachable - return new Output(new UnreachableError()); + return new Output(new UnrecognizedError("Unexpected end of sentence")); } else { const token = src.match(/(.*)(?:\s|$)/)?.[1]; if (token) { From 486723da0dba5815b226a6b1be7baf74d1767b89 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 16 Jan 2024 17:46:02 +0800 Subject: [PATCH 134/271] implement fuzzer for phrases --- src/fuzzer.ts | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 src/fuzzer.ts diff --git a/src/fuzzer.ts b/src/fuzzer.ts new file mode 100644 index 0000000..6be2528 --- /dev/null +++ b/src/fuzzer.ts @@ -0,0 +1,84 @@ +import { PREVERB } from "./vocabulary.ts"; +import { CONTENT_WORD } from "./vocabulary.ts"; + +const CONSONANTS = "p t k s m n l j w".split(" "); +const VOWELS = "a e i o u".split(" "); + +function randomIn(...items: Array): T { + if (items.length === 0) { + throw new Error("passed empty arguments"); + } + return items[randomNumber(items.length - 1)]; +} +function randomNumber(max: number): number { + return Math.floor(Math.random() * (max + 1)); +} +function randomWord(set: Set): string { + return randomIn(...set); +} +function fill(number: number, mapper: () => T): Array { + return new Array(number).fill(undefined).map(mapper); +} +function randomName(): string { + const first = randomIn(...CONSONANTS).toUpperCase() + + randomIn(...VOWELS); + const more = fill( + randomNumber(2), + () => randomIn(...CONSONANTS) + randomIn(...VOWELS), + ); + return first + more.join(""); +} +function asAlaQuestion(word: string): Array { + return [word, "ala", word]; +} +function randomModifier(): Array { + return randomIn( + () => [randomWord(CONTENT_WORD)], + () => asAlaQuestion(randomWord(CONTENT_WORD)), + () => [randomName()], + () => ["pi", ...randomPhrase()], + () => ["nanpa", ...randomPhrase()], + randomNumberWords, + )(); +} +function randomNumberWords(): Array { + const words = []; + let number = 1 + randomNumber(400); + while (number > 0) { + if (number >= 100) { + words.push(randomIn("ale", "ali")); + number -= 100; + } else if (number >= 20) { + words.push("mute"); + number -= 20; + } else if (number >= 5) { + words.push("luka"); + number -= 5; + } else if (number >= 2) { + words.push("tu"); + number -= 2; + } else { + words.push("wan"); + number--; + } + } + return words; +} +function randomPhrase(): Array { + const modifiers = fill(randomNumber(2), randomModifier).flat(); + const phrase = randomIn( + () => { + const headWord = randomIn( + () => [randomWord(CONTENT_WORD)], + () => asAlaQuestion(randomWord(CONTENT_WORD)), + )(); + return [...headWord, ...modifiers]; + }, + () => [...randomNumberWords(), ...modifiers], + )(); + return randomIn( + () => phrase, + () => [...asAlaQuestion(randomWord(PREVERB)), ...phrase], + () => [randomWord(PREVERB), ...phrase], + )(); +} From c17fe7e9d69bb83db979072ae5252f81e5e9bec5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 16 Jan 2024 19:07:05 +0800 Subject: [PATCH 135/271] full format --- src/ast.ts | 100 ++++++++++---------- src/fuzzer.ts | 24 ++--- src/output.ts | 39 +++----- src/parser.ts | 223 +++++++++++++++------------------------------ src/translation.ts | 7 +- src/translator.ts | 2 - src/vocabulary.ts | 13 ++- test-parser.ts | 13 +-- 8 files changed, 160 insertions(+), 261 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index d547369..14c0df7 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -5,38 +5,35 @@ export type Modifier = | { type: "pi"; phrase: Phrase } | { type: "nanpa ordinal"; phrase: Phrase } | { type: "cardinal"; number: Array } - | { - type: "quotation"; - quotation: Quotation; - }; - + | { type: "quotation"; quotation: Quotation }; /** Represents quotation. */ export type Quotation = { sentences: Array; leftMark: string; rightMark: string; }; - /** Represents a simple phrase. */ export type SimplePhrase = { type: "default"; headWord: string; alaQuestion: boolean; modifiers: Array; -} | { type: "cardinal"; number: Array; modifiers: Array }; - +} | { + type: "cardinal"; + number: Array; + modifiers: Array; +}; /** Represents a phrase including preverbial phrases and quotations. */ -export type Phrase = - | { type: "default"; phrase: SimplePhrase } - | { - type: "preverb"; - preverb: string; - alaQuestion: boolean; - modifiers: Array; - phrase: SimplePhrase; - } - | { type: "quotation"; quotation: Quotation }; - +export type Phrase = { type: "default"; phrase: SimplePhrase } | { + type: "preverb"; + preverb: string; + alaQuestion: boolean; + modifiers: Array; + phrase: SimplePhrase; +} | { + type: "quotation"; + quotation: Quotation; +}; /** Represents a single prepositional phrase. */ export type Preposition = { preposition: string; @@ -44,39 +41,38 @@ export type Preposition = { modifiers: Array; phrase: Phrase; }; - /** Represents a single predicate. */ -export type Predicate = - | { type: "default"; predicate: Phrase; objects: Array } - | { - type: "preposition"; - preposition: Preposition; - objects: Array; - }; - +export type Predicate = { + type: "default"; + predicate: Phrase; + objects: Array; +} | { + type: "preposition"; + preposition: Preposition; + objects: Array; +}; /** Represents a simple clause. */ -export type Clause = - | { type: "en phrases"; phrases: Array } - | { type: "o vocative"; phrases: Array } - | { - type: "li clause"; - subjects: Array; - predicates: Array; - prepositions: Array; - } - | { - type: "o clause"; - subjects: Array; - predicates: Array; - prepositions: Array; - } - | { type: "prepositions"; prepositions: Array }; - -/** Represents a clause including preclause and postclause. */ -export type FullClause = { taso: boolean; anuSeme: boolean; clause: Clause }; - -/** Represents a single full sentence. */ -export type Sentence = { - laClauses: Array; - punctuation: string; +export type Clause = { type: "en phrases"; phrases: Array } | { + type: "o vocative"; + phrases: Array; +} | { + type: "li clause"; + subjects: Array; + predicates: Array; + prepositions: Array; +} | { + type: "o clause"; + subjects: Array; + predicates: Array; + prepositions: Array; +} | { + type: "prepositions"; + prepositions: Array; +}; /** Represents a clause including preclause and postclause. */ +export type FullClause = { + taso: boolean; + anuSeme: boolean; + clause: Clause; }; +/** Represents a single full sentence. */ +export type Sentence = { laClauses: Array; punctuation: string }; diff --git a/src/fuzzer.ts b/src/fuzzer.ts index 6be2528..fc30e82 100644 --- a/src/fuzzer.ts +++ b/src/fuzzer.ts @@ -5,9 +5,7 @@ const CONSONANTS = "p t k s m n l j w".split(" "); const VOWELS = "a e i o u".split(" "); function randomIn(...items: Array): T { - if (items.length === 0) { - throw new Error("passed empty arguments"); - } + if (items.length === 0) throw new Error("passed empty arguments"); return items[randomNumber(items.length - 1)]; } function randomNumber(max: number): number { @@ -20,8 +18,7 @@ function fill(number: number, mapper: () => T): Array { return new Array(number).fill(undefined).map(mapper); } function randomName(): string { - const first = randomIn(...CONSONANTS).toUpperCase() + - randomIn(...VOWELS); + const first = randomIn(...CONSONANTS).toUpperCase() + randomIn(...VOWELS); const more = fill( randomNumber(2), () => randomIn(...CONSONANTS) + randomIn(...VOWELS), @@ -66,16 +63,13 @@ function randomNumberWords(): Array { } function randomPhrase(): Array { const modifiers = fill(randomNumber(2), randomModifier).flat(); - const phrase = randomIn( - () => { - const headWord = randomIn( - () => [randomWord(CONTENT_WORD)], - () => asAlaQuestion(randomWord(CONTENT_WORD)), - )(); - return [...headWord, ...modifiers]; - }, - () => [...randomNumberWords(), ...modifiers], - )(); + const phrase = randomIn(() => { + const headWord = randomIn( + () => [randomWord(CONTENT_WORD)], + () => asAlaQuestion(randomWord(CONTENT_WORD)), + )(); + return [...headWord, ...modifiers]; + }, () => [...randomNumberWords(), ...modifiers])(); return randomIn( () => phrase, () => [...asAlaQuestion(randomWord(PREVERB)), ...phrase], diff --git a/src/output.ts b/src/output.ts index 08b088b..818dfdf 100644 --- a/src/output.ts +++ b/src/output.ts @@ -1,5 +1,4 @@ import { OutputError } from "./error.ts"; - /** Represents possibilities and error. */ export class Output { /** Represents possibilities, considered error when the array is empty. */ @@ -13,9 +12,7 @@ export class Output { this.output = output; if (output.length === 0) { this.error = new OutputError("no error provided"); - } else { - this.error = null; - } + } else this.error = null; } else if (output instanceof OutputError) { this.output = []; this.error = output; @@ -25,9 +22,7 @@ export class Output { } } private setError(error: OutputError) { - if (this.output.length === 0 && !this.error) { - this.error = error; - } + if (this.output.length === 0 && !this.error) this.error = error; } private push(value: T): void { this.output.push(value); @@ -35,11 +30,8 @@ export class Output { } private append({ output, error }: Output): void { this.output = [...this.output, ...output]; - if (this.output.length > 0) { - this.error = null; - } else { - this.error = error; - } + if (this.output.length > 0) this.error = null; + else this.error = error; } /** Returns true when the output array is empty */ isError(): boolean { @@ -50,35 +42,26 @@ export class Output { * function can throw OutputError; Other kinds of errors will be ignored. */ map(mapper: (value: T) => U): Output { - if (this.isError()) { - return new Output(this.error); - } + if (this.isError()) return new Output(this.error); const wholeOutput = new Output(); for (const value of this.output) { try { wholeOutput.push(mapper(value)); } catch (error) { - if (error instanceof OutputError) { - this.setError(error); - } else { - throw error; - } + if (error instanceof OutputError) this.setError(error); + else throw error; } } return wholeOutput; } - /** - * Accepts mapper function that returns another Output. flatMap takes all + /** + * Accepts mapper function that returns another Output. flatMap takes all * values and flattens them into single array for Output. */ flatMap(mapper: (value: T) => Output): Output { - if (this.isError()) { - return new Output(this.error); - } + if (this.isError()) return new Output(this.error); const wholeOutput = new Output(); - for (const value of this.output) { - wholeOutput.append(mapper(value)); - } + for (const value of this.output) wholeOutput.append(mapper(value)); return wholeOutput; } } diff --git a/src/parser.ts b/src/parser.ts index 5444edc..8bdf6aa 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -22,6 +22,7 @@ import { type ValueRest = { value: T; rest: string }; /** A special kind of Output that parsers returns. */ type ParserOutput = Output>; + /** Wrapper of parser function with added methods for convenience. */ class Parser { constructor(public readonly parser: (src: string) => ParserOutput) {} @@ -60,11 +61,8 @@ function match(regex: RegExp): Parser { return new Output(new UnrecognizedError("Unexpected end of sentence")); } else { const token = src.match(/(.*)(?:\s|$)/)?.[1]; - if (token) { - return new Output(new UnrecognizedError(`"${token}"`)); - } else { - return new Output(new UnreachableError()); - } + if (token) return new Output(new UnrecognizedError(`"${token}"`)); + else return new Output(new UnreachableError()); } }); } @@ -75,11 +73,8 @@ function nothing(): Parser { /** Parses the end of line (or the end of sentence in context of Toki Pona) */ function eol(): Parser { return new Parser((src) => { - if (src === "") { - return new Output([{ value: null, rest: "" }]); - } else { - return new Output(new UnrecognizedError(`"${src}"`)); - } + if (src === "") return new Output([{ value: null, rest: "" }]); + else return new Output(new UnrecognizedError(`"${src}"`)); }); } /** Parses without consuming the source string */ @@ -111,11 +106,8 @@ function choice(...choices: Array>): Parser { function choiceOnlyOne(...choices: Array>): Parser { return new Parser((src) => choices.reduce((output, parser) => { - if (output.isError()) { - return parser.parser(src); - } else { - return output; - } + if (output.isError()) return parser.parser(src); + else return output; }, new Output>()) ); } @@ -157,10 +149,9 @@ function many(parser: Parser): Parser> { } /** Like `many` but parses at least once. */ function manyAtLeastOnce(parser: Parser): Parser> { - return sequence(parser, many(parser)).map(([first, rest]) => [ - first, - ...rest, - ]); + return sequence(parser, many(parser)).map(( + [first, rest], + ) => [first, ...rest]); } /** * Parses `parser` multiple times and returns an `Array`. This function is @@ -176,10 +167,7 @@ function all(parser: Parser): Parser> { } /** Like `all` but parses at least once. */ function allAtLeastOnce(parser: Parser): Parser> { - return sequence(parser, all(parser)).map(([first, rest]) => [ - first, - ...rest, - ]); + return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } /** Parses comma. */ function comma(): Parser { @@ -198,29 +186,22 @@ function word(): Parser { * string. This function is exhaustive like `all`. */ function properWords(): Parser { - return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)) - .map( - (array) => array.join(" "), - ); + return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map( + (array) => array.join(" "), + ); } /** Parses word only from `set`. */ function wordFrom(set: Set, description: string): Parser { return word().map((word) => { - if (set.has(word)) { - return word; - } else { - throw new UnrecognizedError(`"${word}" as ${description}`); - } + if (set.has(word)) return word; + else throw new UnrecognizedError(`"${word}" as ${description}`); }); } /** Parses a specific word. */ function specificWord(thatWord: string): Parser { return word().map((thisWord) => { - if (thatWord === thisWord) { - return thisWord; - } else { - throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); - } + if (thatWord === thisWord) return thisWord; + else throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); }); } /** Parses X ala X construction as well as just X */ @@ -229,11 +210,8 @@ function optionalAlaQuestion( ): Parser<[string, boolean]> { return choice( sequence(parser.skip(specificWord("ala")), parser).map(([left, right]) => { - if (left === right) { - return [left, true] as [string, boolean]; - } else { - throw new UnreachableError(); - } + if (left === right) return [left, true] as [string, boolean]; + else throw new UnreachableError(); }), parser.map((word) => [word, false]), ); @@ -241,50 +219,29 @@ function optionalAlaQuestion( /** Parses number words in order. */ function number(): Parser> { return sequence( - many(choice( - specificWord("ale"), - specificWord( - "ali", - ), - )), + many(choice(specificWord("ale"), specificWord("ali"))), many(specificWord("mute")), many(specificWord("luka")), many(specificWord("tu")), many(specificWord("wan")), ).map((array) => { const output = array.flat(); - if (output.length >= 2) { - return output; - } else { - throw new UnreachableError(); - } + if (output.length >= 2) return output; + else throw new UnreachableError(); }); } /** Parses a single modifier. */ function modifier(): Parser { return choice( - specificWord("nanpa") - .with(phrase()) - .map((phrase) => ({ - type: "nanpa ordinal", - phrase, - })), - wordFrom(CONTENT_WORD, "modifier").map( - (word) => ({ - type: "word", - word, - } as Modifier), - ), - properWords().map((words) => ({ - type: "proper words", - words, + specificWord("nanpa").with(phrase()).map((phrase) => ({ + type: "nanpa ordinal", + phrase, })), - specificWord("pi") - .with(phrase()) - .map((phrase) => ({ - type: "pi", - phrase, - })), + wordFrom(CONTENT_WORD, "modifier").map(( + word, + ) => ({ type: "word", word } as Modifier)), + properWords().map((words) => ({ type: "proper words", words })), + specificWord("pi").with(phrase()).map((phrase) => ({ type: "pi", phrase })), number().map((number) => ({ type: "cardinal", number })), quotation().map((quotation) => ({ type: "quotation", quotation })), ); @@ -292,23 +249,18 @@ function modifier(): Parser { /** Parses phrase. */ function simplePhrase(): Parser { return choice( - sequence(number(), many(modifier())).map(([number, modifiers]) => ({ - type: "cardinal", - number, - modifiers, - } as SimplePhrase)), + sequence(number(), many(modifier())).map(( + [number, modifiers], + ) => ({ type: "cardinal", number, modifiers } as SimplePhrase)), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), many(modifier()), - ) - .map( - ([[headWord, alaQuestion], modifiers]) => ({ - type: "default", - headWord, - alaQuestion, - modifiers, - }), - ), + ).map(([[headWord, alaQuestion], modifiers]) => ({ + type: "default", + headWord, + alaQuestion, + modifiers, + })), ); } /** Parses phrases including preverbial phrases. */ @@ -318,17 +270,16 @@ function phrase(): Parser { optionalAlaQuestion(wordFrom(PREVERB, "preverb")), many(lazy(modifier)), lazy(simplePhrase), - ).map(([[preverb, alaQuestion], modifiers, phrase]) => ({ + ).map(( + [[preverb, alaQuestion], modifiers, phrase], + ) => ({ type: "preverb", preverb, alaQuestion, modifiers, phrase, } as Phrase)), - lazy(simplePhrase).map((phrase) => ({ - type: "default", - phrase, - })), + lazy(simplePhrase).map((phrase) => ({ type: "default", phrase })), quotation().map((quotation) => ({ type: "quotation", quotation })), ); } @@ -338,15 +289,12 @@ function preposition(): Parser { optionalAlaQuestion(wordFrom(PREPOSITION, "preposition")), many(modifier()), phrase(), - ) - .map( - ([[preposition, alaQuestion], modifiers, phrase]) => ({ - preposition, - alaQuestion, - modifiers, - phrase, - }), - ); + ).map(([[preposition, alaQuestion], modifiers, phrase]) => ({ + preposition, + alaQuestion, + modifiers, + phrase, + })); } /** Parses phrases separated by _en_. */ function enPhrases(): Parser> { @@ -366,11 +314,9 @@ function predicate(): Parser { preposition, objects, })), - sequence(phrase(), objects()).map( - ( - [predicate, objects], - ) => ({ type: "default", predicate, objects } as Predicate), - ), + sequence(phrase(), objects()).map(( + [predicate, objects], + ) => ({ type: "default", predicate, objects } as Predicate)), ); } /** Parses a single clause. */ @@ -383,38 +329,26 @@ function clause(): Parser { many(optionalComma().with(preposition())), ).map(([subject, predicate, morePredicates, prepositions]) => ({ type: "li clause", - subjects: [ - { + subjects: [{ + type: "default", + phrase: { type: "default", - phrase: { - type: "default", - headWord: subject, - alaQuestion: false, - modifiers: [], - }, + headWord: subject, + alaQuestion: false, + modifiers: [], }, - ], + }], predicates: [predicate, ...morePredicates], prepositions, })), manyAtLeastOnce(optionalComma().with(preposition())).map(( prepositions, - ) => ({ - type: "prepositions", - prepositions, + ) => ({ type: "prepositions", prepositions })), + enPhrases().map((phrases) => ({ type: "en phrases", phrases } as Clause)), + enPhrases().skip(specificWord("o")).map((phrases) => ({ + type: "o vocative", + phrases, })), - enPhrases().map( - (phrases) => ({ - type: "en phrases", - phrases, - } as Clause), - ), - enPhrases() - .skip(specificWord("o")) - .map((phrases) => ({ - type: "o vocative", - phrases, - })), sequence( enPhrases(), manyAtLeastOnce( @@ -461,15 +395,13 @@ function fullClause(): Parser { optional( sequence(optionalComma(), specificWord("anu"), specificWord("seme")), ), - ).map( - ([taso, clause, anuSeme]) => ({ - taso: !!taso, - anuSeme: !!anuSeme, - clause, - }), - ); + ).map(([taso, clause, anuSeme]) => ({ + taso: !!taso, + anuSeme: !!anuSeme, + clause, + })); } -// parses _la_ with optional comma around +/** parses _la_ with optional comma around. */ function la(): Parser { return choice( comma().with(specificWord("la")), @@ -519,19 +451,12 @@ function quotation(): Parser { if (rightMark !== "」") { throw new UnrecognizedError("Mismatched quotation marks"); } - } else { - throw new UnreachableError(); - } - return { - sentences, - leftMark, - rightMark, - }; + } else throw new UnreachableError(); + return { sentences, leftMark, rightMark }; }); } /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { - return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()) - .parser(src) + return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()).parser(src) .map(({ value }) => value); } diff --git a/src/translation.ts b/src/translation.ts index 4a34d1e..8e3fafa 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -102,7 +102,12 @@ export const NOUN = { tenpo: ["time"], toki: ["communication", "communications", "language", "languages", "hello"], tomo: ["house", "houses"], - tonsi: ["transgender person", "transgender people", "non-binary person", "non-binary people"], + tonsi: [ + "transgender person", + "transgender people", + "non-binary person", + "non-binary people", + ], tu: ["pair"], unpa: ["sex"], uta: ["mouth"], diff --git a/src/translator.ts b/src/translator.ts index 6dba73d..72c886b 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,9 +1,7 @@ import { Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; - type TranslationOutput = Output; - function translateSentence(output: Sentence): TranslationOutput { throw new Error("todo"); } diff --git a/src/vocabulary.ts b/src/vocabulary.ts index fb5e1f9..890a5b5 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -143,9 +143,12 @@ export const PREVERB = new Set([ "wile", ]); /** Prepositions. */ -export const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); -/** Full vocabulary. */ -export const VOCABULARY = new Set([ - ...PARTICLES, - ...CONTENT_WORD, +export const PREPOSITION = new Set([ + "kepeken", + "lon", + "sama", + "tan", + "tawa", ]); +/** Full vocabulary. */ +export const VOCABULARY = new Set([...PARTICLES, ...CONTENT_WORD]); diff --git a/test-parser.ts b/test-parser.ts index 6ab0da2..dd3f533 100644 --- a/test-parser.ts +++ b/test-parser.ts @@ -2,12 +2,7 @@ import { OutputError } from "./src/error.ts"; import { parser } from "./src/parser.ts"; const input = await Deno.readTextFile("./test.txt"); -console.log( - JSON.stringify(parser(input), (key, value) => { - if (key === "error") { - return (value as null | OutputError)?.message; - } else { - return value; - } - }, 2), -); +console.log(JSON.stringify(parser(input), (key, value) => { + if (key === "error") return (value as null | OutputError)?.message; + else return value; +}, 2)); From e2570ff9746518ab7809bf1c51e4c29188b4f914 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 09:03:59 +0800 Subject: [PATCH 136/271] allow preverbs to be nested --- src/ast.ts | 10 ++++------ src/parser.ts | 44 +++++++++++++++++--------------------------- 2 files changed, 21 insertions(+), 33 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 14c0df7..db53cf1 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -12,8 +12,8 @@ export type Quotation = { leftMark: string; rightMark: string; }; -/** Represents a simple phrase. */ -export type SimplePhrase = { +/** Represents a phrase including preverbial phrases and quotations. */ +export type Phrase = { type: "default"; headWord: string; alaQuestion: boolean; @@ -22,14 +22,12 @@ export type SimplePhrase = { type: "cardinal"; number: Array; modifiers: Array; -}; -/** Represents a phrase including preverbial phrases and quotations. */ -export type Phrase = { type: "default"; phrase: SimplePhrase } | { +} | { type: "preverb"; preverb: string; alaQuestion: boolean; modifiers: Array; - phrase: SimplePhrase; + phrase: Phrase; } | { type: "quotation"; quotation: Quotation; diff --git a/src/parser.ts b/src/parser.ts index 8bdf6aa..530c410 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -7,7 +7,6 @@ import { Preposition, Quotation, Sentence, - SimplePhrase, } from "./ast.ts"; import { UnreachableError, UnrecognizedError } from "./error.ts"; import { Output } from "./output.ts"; @@ -246,30 +245,16 @@ function modifier(): Parser { quotation().map((quotation) => ({ type: "quotation", quotation })), ); } -/** Parses phrase. */ -function simplePhrase(): Parser { - return choice( - sequence(number(), many(modifier())).map(( - [number, modifiers], - ) => ({ type: "cardinal", number, modifiers } as SimplePhrase)), - sequence( - optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), - many(modifier()), - ).map(([[headWord, alaQuestion], modifiers]) => ({ - type: "default", - headWord, - alaQuestion, - modifiers, - })), - ); -} /** Parses phrases including preverbial phrases. */ function phrase(): Parser { return choice( + sequence(number(), many(modifier())).map(( + [number, modifiers], + ) => ({ type: "cardinal", number, modifiers } as Phrase)), sequence( optionalAlaQuestion(wordFrom(PREVERB, "preverb")), many(lazy(modifier)), - lazy(simplePhrase), + lazy(phrase), ).map(( [[preverb, alaQuestion], modifiers, phrase], ) => ({ @@ -279,7 +264,15 @@ function phrase(): Parser { modifiers, phrase, } as Phrase)), - lazy(simplePhrase).map((phrase) => ({ type: "default", phrase })), + sequence( + optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), + many(modifier()), + ).map(([[headWord, alaQuestion], modifiers]) => ({ + type: "default", + headWord, + alaQuestion, + modifiers, + })), quotation().map((quotation) => ({ type: "quotation", quotation })), ); } @@ -331,16 +324,13 @@ function clause(): Parser { type: "li clause", subjects: [{ type: "default", - phrase: { - type: "default", - headWord: subject, - alaQuestion: false, - modifiers: [], - }, + headWord: subject, + alaQuestion: false, + modifiers: [], }], predicates: [predicate, ...morePredicates], prepositions, - })), + } as Clause)), manyAtLeastOnce(optionalComma().with(preposition())).map(( prepositions, ) => ({ type: "prepositions", prepositions })), From cfee8349e5563944725fc2cef8405081956c141e Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 10:45:02 +0800 Subject: [PATCH 137/271] phrases can now be prepositional --- src/ast.ts | 21 +++++++++++---------- src/parser.ts | 22 ++++++++++------------ 2 files changed, 21 insertions(+), 22 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index db53cf1..e385783 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -12,7 +12,10 @@ export type Quotation = { leftMark: string; rightMark: string; }; -/** Represents a phrase including preverbial phrases and quotations. */ +/** + * Represents a phrase including preverbial phrases, quotations, and + * prepositional phrases intended for predicate. + */ export type Phrase = { type: "default"; headWord: string; @@ -28,6 +31,9 @@ export type Phrase = { alaQuestion: boolean; modifiers: Array; phrase: Phrase; +} | { + type: "preposition"; + preposition: Preposition; } | { type: "quotation"; quotation: Quotation; @@ -39,15 +45,10 @@ export type Preposition = { modifiers: Array; phrase: Phrase; }; -/** Represents a single predicate. */ -export type Predicate = { - type: "default"; +/** Represents a single predicate and multiple objects. */ +export type PredicateObjects = { predicate: Phrase; objects: Array; -} | { - type: "preposition"; - preposition: Preposition; - objects: Array; }; /** Represents a simple clause. */ export type Clause = { type: "en phrases"; phrases: Array } | { @@ -56,12 +57,12 @@ export type Clause = { type: "en phrases"; phrases: Array } | { } | { type: "li clause"; subjects: Array; - predicates: Array; + predicates: Array; prepositions: Array; } | { type: "o clause"; subjects: Array; - predicates: Array; + predicates: Array; prepositions: Array; } | { type: "prepositions"; diff --git a/src/parser.ts b/src/parser.ts index 530c410..85fbf1c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -3,7 +3,7 @@ import { FullClause, Modifier, Phrase, - Predicate, + PredicateObjects, Preposition, Quotation, Sentence, @@ -264,6 +264,11 @@ function phrase(): Parser { modifiers, phrase, } as Phrase)), + sequence(preposition(), objects()).map(([preposition, objects]) => ({ + type: "preposition", + preposition, + objects, + })), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), many(modifier()), @@ -300,17 +305,10 @@ function objects(): Parser> { return many(optionalComma().with(specificWord("e")).with(phrase())); } /** Parses a single predicate without _li_ nor _o_. */ -function predicate(): Parser { - return choice( - sequence(preposition(), objects()).map(([preposition, objects]) => ({ - type: "preposition", - preposition, - objects, - })), - sequence(phrase(), objects()).map(( - [predicate, objects], - ) => ({ type: "default", predicate, objects } as Predicate)), - ); +function predicate(): Parser { + return sequence(phrase(), objects()).map(( + [predicate, objects], + ) => ({ predicate, objects } as PredicateObjects)); } /** Parses a single clause. */ function clause(): Parser { From 7db1122c5dd06b8bd329fdda04a59ad8b25eb910 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 11:43:27 +0800 Subject: [PATCH 138/271] implement associated predicates --- src/ast.ts | 17 ++++++++++------- src/parser.ts | 53 ++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 48 insertions(+), 22 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index e385783..5fa6966 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -45,10 +45,15 @@ export type Preposition = { modifiers: Array; phrase: Phrase; }; -/** Represents a single predicate and multiple objects. */ -export type PredicateObjects = { - predicate: Phrase; +/** + * Represents a single simple predicate or multiple predicates associated with + * the same objects or prepositions. + */ +export type AssociatedPredicates = { type: "simple"; predicate: Phrase } | { + type: "associated"; + predicates: Array; objects: Array; + prepositions: Array; }; /** Represents a simple clause. */ export type Clause = { type: "en phrases"; phrases: Array } | { @@ -57,13 +62,11 @@ export type Clause = { type: "en phrases"; phrases: Array } | { } | { type: "li clause"; subjects: Array; - predicates: Array; - prepositions: Array; + predicates: Array; } | { type: "o clause"; subjects: Array; - predicates: Array; - prepositions: Array; + predicates: Array; } | { type: "prepositions"; prepositions: Array; diff --git a/src/parser.ts b/src/parser.ts index 85fbf1c..c5e3d36 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,9 +1,9 @@ import { + AssociatedPredicates, Clause, FullClause, Modifier, Phrase, - PredicateObjects, Preposition, Quotation, Sentence, @@ -304,19 +304,42 @@ function enPhrases(): Parser> { function objects(): Parser> { return many(optionalComma().with(specificWord("e")).with(phrase())); } -/** Parses a single predicate without _li_ nor _o_. */ -function predicate(): Parser { - return sequence(phrase(), objects()).map(( - [predicate, objects], - ) => ({ predicate, objects } as PredicateObjects)); +/** Parses a single associated predicates without _li_ nor _o_ at first. */ +function associatedPredicates(particle: string): Parser { + return choice( + phrase().map(( + predicate, + ) => ({ type: "simple", predicate } as AssociatedPredicates)), + sequence( + phrase(), + many(optionalComma().with(specificWord(particle)).with(phrase())), + objects(), + many(preposition()), + ).map(([predicate, morePredicates, objects, prepositions]) => { + if (objects.length === 0 && prepositions.length === 0) { + throw new UnreachableError(); + } else { + return { + type: "associated", + predicates: [predicate, ...morePredicates], + objects, + prepositions, + }; + } + }), + ); } /** Parses a single clause. */ function clause(): Parser { return choice( sequence( wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), - predicate(), - many(optionalComma().with(specificWord("li")).with(predicate())), + associatedPredicates("li"), + many( + optionalComma().with(specificWord("li")).with( + associatedPredicates("li"), + ), + ), many(optionalComma().with(preposition())), ).map(([subject, predicate, morePredicates, prepositions]) => ({ type: "li clause", @@ -340,19 +363,19 @@ function clause(): Parser { sequence( enPhrases(), manyAtLeastOnce( - optionalComma().with(specificWord("li")).with(predicate()), + optionalComma().with(specificWord("li")).with( + associatedPredicates("li"), + ), ), - many(optionalComma().with(preposition())), - ).map(([subjects, predicates, prepositions]) => ({ + ).map(([subjects, predicates]) => ({ type: "li clause", subjects, predicates, - prepositions, })), sequence( - specificWord("o").with(predicate()), + specificWord("o").with(associatedPredicates("o")), manyAtLeastOnce( - optionalComma().with(specificWord("o")).with(predicate()), + optionalComma().with(specificWord("o")).with(associatedPredicates("o")), ), many(optionalComma().with(preposition())), ).map(([predicate, morePredicates, prepositions]) => ({ @@ -364,7 +387,7 @@ function clause(): Parser { sequence( enPhrases(), manyAtLeastOnce( - optionalComma().with(specificWord("o")).with(predicate()), + optionalComma().with(specificWord("o")).with(associatedPredicates("o")), ), many(optionalComma().with(preposition())), ).map(([subjects, predicates, prepositions]) => ({ From 1c7551950f7d61b4ddb219a3ac20d3a7cd8b8cb8 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 11:52:05 +0800 Subject: [PATCH 139/271] cleanup and fix parsers --- src/parser.ts | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index c5e3d36..9186114 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -264,10 +264,9 @@ function phrase(): Parser { modifiers, phrase, } as Phrase)), - sequence(preposition(), objects()).map(([preposition, objects]) => ({ + preposition().map((preposition) => ({ type: "preposition", preposition, - objects, })), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), @@ -301,9 +300,6 @@ function enPhrases(): Parser> { many(optionalComma().with(specificWord("en")).with(phrase())), ).map(([first, rest]) => [first, ...rest]); } -function objects(): Parser> { - return many(optionalComma().with(specificWord("e")).with(phrase())); -} /** Parses a single associated predicates without _li_ nor _o_ at first. */ function associatedPredicates(particle: string): Parser { return choice( @@ -313,7 +309,7 @@ function associatedPredicates(particle: string): Parser { sequence( phrase(), many(optionalComma().with(specificWord(particle)).with(phrase())), - objects(), + many(optionalComma().with(specificWord("e")).with(phrase())), many(preposition()), ).map(([predicate, morePredicates, objects, prepositions]) => { if (objects.length === 0 && prepositions.length === 0) { @@ -340,8 +336,7 @@ function clause(): Parser { associatedPredicates("li"), ), ), - many(optionalComma().with(preposition())), - ).map(([subject, predicate, morePredicates, prepositions]) => ({ + ).map(([subject, predicate, morePredicates]) => ({ type: "li clause", subjects: [{ type: "default", @@ -350,7 +345,6 @@ function clause(): Parser { modifiers: [], }], predicates: [predicate, ...morePredicates], - prepositions, } as Clause)), manyAtLeastOnce(optionalComma().with(preposition())).map(( prepositions, @@ -377,24 +371,20 @@ function clause(): Parser { manyAtLeastOnce( optionalComma().with(specificWord("o")).with(associatedPredicates("o")), ), - many(optionalComma().with(preposition())), - ).map(([predicate, morePredicates, prepositions]) => ({ + ).map(([predicate, morePredicates]) => ({ type: "o clause", subjects: [], predicates: [predicate, ...morePredicates], - prepositions, })), sequence( enPhrases(), manyAtLeastOnce( optionalComma().with(specificWord("o")).with(associatedPredicates("o")), ), - many(optionalComma().with(preposition())), - ).map(([subjects, predicates, prepositions]) => ({ + ).map(([subjects, predicates]) => ({ type: "o clause", subjects: subjects, predicates, - prepositions, })), ); } From 45be05b7c9be9bfbbef43d3381b7675441295f08 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 12:16:42 +0800 Subject: [PATCH 140/271] avoid infinite recursion --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 9186114..4889e3c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -248,7 +248,7 @@ function modifier(): Parser { /** Parses phrases including preverbial phrases. */ function phrase(): Parser { return choice( - sequence(number(), many(modifier())).map(( + sequence(number(), many(lazy(modifier))).map(( [number, modifiers], ) => ({ type: "cardinal", number, modifiers } as Phrase)), sequence( @@ -264,13 +264,13 @@ function phrase(): Parser { modifiers, phrase, } as Phrase)), - preposition().map((preposition) => ({ + lazy(preposition).map((preposition) => ({ type: "preposition", preposition, })), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), - many(modifier()), + many(lazy(modifier)), ).map(([[headWord, alaQuestion], modifiers]) => ({ type: "default", headWord, From cd24e9b32fbfad4a881dc73f3619a9f70e0c8026 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 17 Jan 2024 12:39:19 +0800 Subject: [PATCH 141/271] fix formatting --- src/ast.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ast.ts b/src/ast.ts index 5fa6966..0509b2f 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -70,7 +70,8 @@ export type Clause = { type: "en phrases"; phrases: Array } | { } | { type: "prepositions"; prepositions: Array; -}; /** Represents a clause including preclause and postclause. */ +}; +/** Represents a clause including preclause and postclause. */ export type FullClause = { taso: boolean; anuSeme: boolean; From b42815820d91600c30caf5b168b395cab15eb5a6 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 08:58:14 +0800 Subject: [PATCH 142/271] implement filter for malformed pi/nanpa nesting --- src/output.ts | 13 +++++++++++-- src/parser.ts | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 42 insertions(+), 6 deletions(-) diff --git a/src/output.ts b/src/output.ts index 818dfdf..5f15691 100644 --- a/src/output.ts +++ b/src/output.ts @@ -37,6 +37,15 @@ export class Output { isError(): boolean { return this.output.length === 0; } + filter(mapper: (value: T) => boolean): Output { + return this.map((value) => { + if (mapper(value)) { + return value; + } else { + throw new OutputError("no error provided"); + } + }); + } /** * Maps all values and returns new Output. For convenience, the mapper * function can throw OutputError; Other kinds of errors will be ignored. @@ -54,8 +63,8 @@ export class Output { } return wholeOutput; } - /** - * Accepts mapper function that returns another Output. flatMap takes all + /** + * Accepts mapper function that returns another Output. flatMap takes all * values and flattens them into single array for Output. */ flatMap(mapper: (value: T) => Output): Output { diff --git a/src/parser.ts b/src/parser.ts index 4889e3c..bcd8242 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -37,6 +37,11 @@ class Parser { })) ); } + filter(mapper: (x: T) => boolean): Parser { + return new Parser((src) => + this.parser(src).filter(({ value }) => mapper(value)) + ); + } /** Takes another parser and discards the first parsing result. */ with(parser: Parser): Parser { return sequence(this, parser).map(([_, output]) => output); @@ -245,15 +250,37 @@ function modifier(): Parser { quotation().map((quotation) => ({ type: "quotation", quotation })), ); } +function modifiers(): Parser> { + return many(modifier()).filter((modifiers) => { + // Filter out malformed nesting with nanpa or pi + const noPi = modifiers.reduceRight((array, modifier) => { + if (array.length === 0 && modifier.type === "pi") { + return []; + } else { + return [modifier, ...array]; + } + }, [] as Array); + const noNanpa = noPi.reduceRight((array, modifier) => { + if (array.length === 0 && modifier.type === "nanpa ordinal") { + return []; + } else { + return [modifier, ...array]; + } + }, [] as Array); + return noNanpa.every((modifier) => + modifier.type !== "pi" && modifier.type !== "nanpa ordinal" + ); + }); +} /** Parses phrases including preverbial phrases. */ function phrase(): Parser { return choice( - sequence(number(), many(lazy(modifier))).map(( + sequence(number(), lazy(modifiers)).map(( [number, modifiers], ) => ({ type: "cardinal", number, modifiers } as Phrase)), sequence( optionalAlaQuestion(wordFrom(PREVERB, "preverb")), - many(lazy(modifier)), + lazy(modifiers), lazy(phrase), ).map(( [[preverb, alaQuestion], modifiers, phrase], @@ -270,7 +297,7 @@ function phrase(): Parser { })), sequence( optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), - many(lazy(modifier)), + lazy(modifiers), ).map(([[headWord, alaQuestion], modifiers]) => ({ type: "default", headWord, @@ -284,7 +311,7 @@ function phrase(): Parser { function preposition(): Parser { return sequence( optionalAlaQuestion(wordFrom(PREPOSITION, "preposition")), - many(modifier()), + modifiers(), phrase(), ).map(([[preposition, alaQuestion], modifiers, phrase]) => ({ preposition, From c0335c145306a1233cadeb48725dcb5c6ba85374 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 09:00:08 +0800 Subject: [PATCH 143/271] use filter instead --- src/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index bcd8242..aa68e50 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -196,15 +196,15 @@ function properWords(): Parser { } /** Parses word only from `set`. */ function wordFrom(set: Set, description: string): Parser { - return word().map((word) => { - if (set.has(word)) return word; + return word().filter((word) => { + if (set.has(word)) return true; else throw new UnrecognizedError(`"${word}" as ${description}`); }); } /** Parses a specific word. */ function specificWord(thatWord: string): Parser { - return word().map((thisWord) => { - if (thatWord === thisWord) return thisWord; + return word().filter((thisWord) => { + if (thatWord === thisWord) return true; else throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); }); } From a3fcb2a824abdb567b03dc2f4621c8e95f453811 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 09:34:33 +0800 Subject: [PATCH 144/271] implement duplicate checker --- src/duplicate-checker.ts | 23 +++++++++++++++++++++++ src/fuzzer.ts | 4 +++- 2 files changed, 26 insertions(+), 1 deletion(-) create mode 100644 src/duplicate-checker.ts diff --git a/src/duplicate-checker.ts b/src/duplicate-checker.ts new file mode 100644 index 0000000..d38c5a5 --- /dev/null +++ b/src/duplicate-checker.ts @@ -0,0 +1,23 @@ +import { randomPhrase } from "./fuzzer.ts"; +import { parser } from "./parser.ts"; + +const timeStart = +new Date(); +const duration = 10 * 1000; +let count = 0; + +while (+new Date() < timeStart + duration) { + const words = randomPhrase(); + if (words.length > 10) continue; + const src = words.join(" "); + const set = new Set(); + for (const ast of parser(src).output) { + const json = JSON.stringify(ast); + if (set.has(json)) { + throw new Error(`Duplicate found when parsing "${src}".`); + } else { + set.add(json); + } + } + count++; +} +console.log(`Tested ${count} random sentences.`); diff --git a/src/fuzzer.ts b/src/fuzzer.ts index fc30e82..d76c806 100644 --- a/src/fuzzer.ts +++ b/src/fuzzer.ts @@ -61,7 +61,9 @@ function randomNumberWords(): Array { } return words; } -function randomPhrase(): Array { +// TODO: nested preverbs and preposition +// TODO: remove export when randomSentence is defined +export function randomPhrase(): Array { const modifiers = fill(randomNumber(2), randomModifier).flat(); const phrase = randomIn(() => { const headWord = randomIn( From aad5ca72202b5c8774e680528ae9a1d9be7883a0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 09:42:47 +0800 Subject: [PATCH 145/271] simplify modifier parser --- src/parser.ts | 67 ++++++++++++++++++++++++--------------------------- 1 file changed, 31 insertions(+), 36 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index aa68e50..dca4b32 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -234,43 +234,38 @@ function number(): Parser> { else throw new UnreachableError(); }); } -/** Parses a single modifier. */ -function modifier(): Parser { - return choice( - specificWord("nanpa").with(phrase()).map((phrase) => ({ - type: "nanpa ordinal", - phrase, - })), - wordFrom(CONTENT_WORD, "modifier").map(( - word, - ) => ({ type: "word", word } as Modifier)), - properWords().map((words) => ({ type: "proper words", words })), - specificWord("pi").with(phrase()).map((phrase) => ({ type: "pi", phrase })), - number().map((number) => ({ type: "cardinal", number })), - quotation().map((quotation) => ({ type: "quotation", quotation })), - ); -} +/** Parses multiple modifiers */ function modifiers(): Parser> { - return many(modifier()).filter((modifiers) => { - // Filter out malformed nesting with nanpa or pi - const noPi = modifiers.reduceRight((array, modifier) => { - if (array.length === 0 && modifier.type === "pi") { - return []; - } else { - return [modifier, ...array]; - } - }, [] as Array); - const noNanpa = noPi.reduceRight((array, modifier) => { - if (array.length === 0 && modifier.type === "nanpa ordinal") { - return []; - } else { - return [modifier, ...array]; - } - }, [] as Array); - return noNanpa.every((modifier) => - modifier.type !== "pi" && modifier.type !== "nanpa ordinal" - ); - }); + return sequence( + many( + choice( + wordFrom(CONTENT_WORD, "modifier").map(( + word, + ) => ({ type: "word", word } as Modifier)), + properWords().map(( + words, + ) => ({ type: "proper words", words } as Modifier)), + number().map((number) => ({ type: "cardinal", number } as Modifier)), + quotation().map(( + quotation, + ) => ({ type: "quotation", quotation } as Modifier)), + ), + ), + many( + specificWord("nanpa").with(phrase()).map((phrase) => ({ + type: "nanpa ordinal", + phrase, + } as Modifier)), + ), + many( + specificWord("pi").with(phrase()).map((phrase) => ({ + type: "pi", + phrase, + } as Modifier)), + ), + ).map(( + [modifiers, nanpaModifiers, piModifiers], + ) => [...modifiers, ...nanpaModifiers, ...piModifiers]); } /** Parses phrases including preverbial phrases. */ function phrase(): Parser { From 83c9de12c1100c69033884c61951313ca0dfdd30 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 14:17:30 +0800 Subject: [PATCH 146/271] implement "anu" --- src/ast.ts | 41 ++++++++------ src/parser.ts | 152 +++++++++++++++++++++++++++++++++----------------- 2 files changed, 125 insertions(+), 68 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 0509b2f..cc19730 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -38,35 +38,42 @@ export type Phrase = { type: "quotation"; quotation: Quotation; }; +/** Represents multiple phrases separated by repeated particle or _anu_. */ +export type MultiplePhrases = { type: "single"; phrase: Phrase } | { + type: "and conjunction"; + phrases: Array; +} | { type: "anu"; phrases: Array }; /** Represents a single prepositional phrase. */ export type Preposition = { preposition: string; alaQuestion: boolean; modifiers: Array; - phrase: Phrase; -}; -/** - * Represents a single simple predicate or multiple predicates associated with - * the same objects or prepositions. - */ -export type AssociatedPredicates = { type: "simple"; predicate: Phrase } | { - type: "associated"; - predicates: Array; - objects: Array; - prepositions: Array; + /** This cannot be an "and conjunction": only "anu" or "single". */ + phrases: MultiplePhrases; }; +/** Represents multiple predicates. */ +export type MultiplePredicates = + | { type: "single"; predicate: Phrase } + | { + type: "associated"; + predicates: MultiplePhrases; + objects: null | MultiplePhrases; + prepositions: Array; + } + | { type: "and conjunction"; predicates: Array } + | { type: "anu"; predicates: Array }; /** Represents a simple clause. */ -export type Clause = { type: "en phrases"; phrases: Array } | { +export type Clause = { type: "en phrases"; phrases: MultiplePhrases } | { type: "o vocative"; - phrases: Array; + phrases: MultiplePhrases; } | { type: "li clause"; - subjects: Array; - predicates: Array; + subjects: MultiplePhrases; + predicates: MultiplePredicates; } | { type: "o clause"; - subjects: Array; - predicates: Array; + subjects: null | MultiplePhrases; + predicates: MultiplePredicates; } | { type: "prepositions"; prepositions: Array; diff --git a/src/parser.ts b/src/parser.ts index dca4b32..3dd003a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -1,8 +1,9 @@ import { - AssociatedPredicates, Clause, FullClause, Modifier, + MultiplePhrases, + MultiplePredicates, Phrase, Preposition, Quotation, @@ -302,44 +303,98 @@ function phrase(): Parser { quotation().map((quotation) => ({ type: "quotation", quotation })), ); } +/** Parses nested phrases with given nesting rule. */ +function nestedPhrases( + nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, +): Parser { + if (nestedPhrases.length === 0) { + return phrase().map( + (phrase) => ({ type: "single", phrase } as MultiplePhrases), + ); + } else { + const [first, ...rest] = nestingRule; + let type: "and conjunction" | "anu"; + if (["en", "li", "o", "e"].indexOf(first) !== 0) { + type = "and conjunction"; + } else { + type = "anu"; + } + return choice( + sequence( + lazy(() => nestedPhrases(rest)), + manyAtLeastOnce( + optionalComma().with(specificWord(first)).with( + lazy(() => nestedPhrases(rest)), + ), + ), + ).map(([group, moreGroups]) => ({ + type, + phrases: [group, ...moreGroups], + })), + phrase().map((phrase) => ({ type: "single", phrase } as MultiplePhrases)), + ); + } +} +function subjectPhrases(): Parser { + return choice( + nestedPhrases(["en", "anu"]), + nestedPhrases(["anu", "en"]).filter((phrase) => phrase.type !== "single"), + ); +} /** Parses prepositional phrase. */ function preposition(): Parser { return sequence( optionalAlaQuestion(wordFrom(PREPOSITION, "preposition")), modifiers(), - phrase(), - ).map(([[preposition, alaQuestion], modifiers, phrase]) => ({ + nestedPhrases(["anu"]), + ).map(([[preposition, alaQuestion], modifiers, phrases]) => ({ preposition, alaQuestion, modifiers, - phrase, + phrases, })); } -/** Parses phrases separated by _en_. */ -function enPhrases(): Parser> { - return sequence( - phrase(), - many(optionalComma().with(specificWord("en")).with(phrase())), - ).map(([first, rest]) => [first, ...rest]); -} -/** Parses a single associated predicates without _li_ nor _o_ at first. */ -function associatedPredicates(particle: string): Parser { +/** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ +function multiplePredicates( + nestingRule: Array<"li" | "o" | "anu">, +): Parser { + const [first, ...rest] = nestingRule; + let type: "and conjunction" | "anu"; + if (first === "li" || first === "o") { + type = "and conjunction"; + } else { + type = "anu"; + } return choice( + sequence( + lazy(() => multiplePredicates(rest)), + manyAtLeastOnce( + optionalComma().with(specificWord(first)).with( + lazy(() => multiplePredicates(rest)), + ), + ), + ).map(([group, moreGroups]) => ({ + type, + predicates: [group, ...moreGroups], + } as MultiplePredicates)), phrase().map(( predicate, - ) => ({ type: "simple", predicate } as AssociatedPredicates)), + ) => ({ type: "single", predicate } as MultiplePredicates)), sequence( - phrase(), - many(optionalComma().with(specificWord(particle)).with(phrase())), - many(optionalComma().with(specificWord("e")).with(phrase())), + nestedPhrases(nestingRule), + optional( + optionalComma().with(specificWord("e")).with( + nestedPhrases(["e", "anu"]), + ), + ), many(preposition()), - ).map(([predicate, morePredicates, objects, prepositions]) => { - if (objects.length === 0 && prepositions.length === 0) { + ).map(([predicates, objects, prepositions]) => { + if (prepositions.length === 0) { throw new UnreachableError(); } else { return { type: "associated", - predicates: [predicate, ...morePredicates], + predicates, objects, prepositions, }; @@ -352,36 +407,34 @@ function clause(): Parser { return choice( sequence( wordFrom(SPECIAL_SUBJECT, "mi/sina subject"), - associatedPredicates("li"), - many( - optionalComma().with(specificWord("li")).with( - associatedPredicates("li"), - ), - ), - ).map(([subject, predicate, morePredicates]) => ({ + multiplePredicates(["li", "anu"]), + ).map(([subject, predicates]) => ({ type: "li clause", - subjects: [{ - type: "default", - headWord: subject, - alaQuestion: false, - modifiers: [], - }], - predicates: [predicate, ...morePredicates], + subjects: { + type: "single", + phrase: { + type: "default", + headWord: subject, + alaQuestion: false, + modifiers: [], + }, + }, + predicates, } as Clause)), manyAtLeastOnce(optionalComma().with(preposition())).map(( prepositions, ) => ({ type: "prepositions", prepositions })), - enPhrases().map((phrases) => ({ type: "en phrases", phrases } as Clause)), - enPhrases().skip(specificWord("o")).map((phrases) => ({ + subjectPhrases().map(( + phrases, + ) => ({ type: "en phrases", phrases } as Clause)), + subjectPhrases().skip(specificWord("o")).map((phrases) => ({ type: "o vocative", phrases, })), sequence( - enPhrases(), - manyAtLeastOnce( - optionalComma().with(specificWord("li")).with( - associatedPredicates("li"), - ), + subjectPhrases(), + optionalComma().with(specificWord("li")).with( + multiplePredicates(["li", "anu"]), ), ).map(([subjects, predicates]) => ({ type: "li clause", @@ -389,19 +442,16 @@ function clause(): Parser { predicates, })), sequence( - specificWord("o").with(associatedPredicates("o")), - manyAtLeastOnce( - optionalComma().with(specificWord("o")).with(associatedPredicates("o")), - ), - ).map(([predicate, morePredicates]) => ({ + specificWord("o").with(multiplePredicates(["o", "anu"])), + ).map(([predicates]) => ({ type: "o clause", - subjects: [], - predicates: [predicate, ...morePredicates], + subjects: null, + predicates, })), sequence( - enPhrases(), - manyAtLeastOnce( - optionalComma().with(specificWord("o")).with(associatedPredicates("o")), + subjectPhrases(), + optionalComma().with(specificWord("o")).with( + multiplePredicates(["o", "anu"]), ), ).map(([subjects, predicates]) => ({ type: "o clause", From f214e3cb6b4adad0f3da59b8422248ab82dbd617 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 14:32:59 +0800 Subject: [PATCH 147/271] fix error --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 3dd003a..6ab2568 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -307,7 +307,7 @@ function phrase(): Parser { function nestedPhrases( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { - if (nestedPhrases.length === 0) { + if (nestingRule.length === 0) { return phrase().map( (phrase) => ({ type: "single", phrase } as MultiplePhrases), ); From 301d90a49644e457225cf662cf358c8542ab3454 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 15:54:24 +0800 Subject: [PATCH 148/271] add base case --- src/parser.ts | 52 +++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 6ab2568..9dc9f12 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -354,29 +354,11 @@ function preposition(): Parser { phrases, })); } -/** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ -function multiplePredicates( +/** Parses a single predicate or a single associated predicates */ +function singlePredicate( nestingRule: Array<"li" | "o" | "anu">, ): Parser { - const [first, ...rest] = nestingRule; - let type: "and conjunction" | "anu"; - if (first === "li" || first === "o") { - type = "and conjunction"; - } else { - type = "anu"; - } return choice( - sequence( - lazy(() => multiplePredicates(rest)), - manyAtLeastOnce( - optionalComma().with(specificWord(first)).with( - lazy(() => multiplePredicates(rest)), - ), - ), - ).map(([group, moreGroups]) => ({ - type, - predicates: [group, ...moreGroups], - } as MultiplePredicates)), phrase().map(( predicate, ) => ({ type: "single", predicate } as MultiplePredicates)), @@ -402,6 +384,36 @@ function multiplePredicates( }), ); } +/** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ +function multiplePredicates( + nestingRule: Array<"li" | "o" | "anu">, +): Parser { + if (nestingRule.length === 0) { + return singlePredicate([]); + } else { + const [first, ...rest] = nestingRule; + let type: "and conjunction" | "anu"; + if (first === "li" || first === "o") { + type = "and conjunction"; + } else { + type = "anu"; + } + return choice( + sequence( + lazy(() => multiplePredicates(rest)), + manyAtLeastOnce( + optionalComma().with(specificWord(first)).with( + lazy(() => multiplePredicates(rest)), + ), + ), + ).map(([group, moreGroups]) => ({ + type, + predicates: [group, ...moreGroups], + } as MultiplePredicates)), + singlePredicate(nestingRule), + ); + } +} /** Parses a single clause. */ function clause(): Parser { return choice( From 7556b3f86187ad623072a4a9caad62f3874c5f5e Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 15:59:58 +0800 Subject: [PATCH 149/271] fix conditional on associated predicates --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 9dc9f12..cc95f06 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -371,7 +371,7 @@ function singlePredicate( ), many(preposition()), ).map(([predicates, objects, prepositions]) => { - if (prepositions.length === 0) { + if (!objects && prepositions.length === 0) { throw new UnreachableError(); } else { return { From 255a8172a2943ee7ec51d10db78cc4ed8413d216 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 16:01:01 +0800 Subject: [PATCH 150/271] fix conditional in nestedPhrases --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index cc95f06..d65e367 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -314,7 +314,7 @@ function nestedPhrases( } else { const [first, ...rest] = nestingRule; let type: "and conjunction" | "anu"; - if (["en", "li", "o", "e"].indexOf(first) !== 0) { + if (["en", "li", "o", "e"].indexOf(first) !== -1) { type = "and conjunction"; } else { type = "anu"; From 80f8ffade60e95be72f3681be50460e8da7b7571 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 16:24:18 +0800 Subject: [PATCH 151/271] order matters --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index d65e367..0a1eb29 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -359,9 +359,6 @@ function singlePredicate( nestingRule: Array<"li" | "o" | "anu">, ): Parser { return choice( - phrase().map(( - predicate, - ) => ({ type: "single", predicate } as MultiplePredicates)), sequence( nestedPhrases(nestingRule), optional( @@ -382,6 +379,9 @@ function singlePredicate( }; } }), + phrase().map(( + predicate, + ) => ({ type: "single", predicate } as MultiplePredicates)), ); } /** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ From 9dc2165269a265aec58ee202df16056791f41081 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 16:40:31 +0800 Subject: [PATCH 152/271] fix "anu" not parsing --- src/parser.ts | 62 ++++++++++++++++++++++++--------------------------- 1 file changed, 29 insertions(+), 33 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 0a1eb29..0314185 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -331,7 +331,7 @@ function nestedPhrases( type, phrases: [group, ...moreGroups], })), - phrase().map((phrase) => ({ type: "single", phrase } as MultiplePhrases)), + lazy(() => nestedPhrases(rest)), ); } } @@ -354,42 +354,36 @@ function preposition(): Parser { phrases, })); } -/** Parses a single predicate or a single associated predicates */ -function singlePredicate( - nestingRule: Array<"li" | "o" | "anu">, -): Parser { - return choice( - sequence( - nestedPhrases(nestingRule), - optional( - optionalComma().with(specificWord("e")).with( - nestedPhrases(["e", "anu"]), - ), - ), - many(preposition()), - ).map(([predicates, objects, prepositions]) => { - if (!objects && prepositions.length === 0) { - throw new UnreachableError(); - } else { - return { - type: "associated", - predicates, - objects, - prepositions, - }; - } - }), - phrase().map(( - predicate, - ) => ({ type: "single", predicate } as MultiplePredicates)), - ); -} /** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { if (nestingRule.length === 0) { - return singlePredicate([]); + return choice( + sequence( + nestedPhrases([]), + optional( + optionalComma().with(specificWord("e")).with( + nestedPhrases(["e", "anu"]), + ), + ), + many(preposition()), + ).map(([predicates, objects, prepositions]) => { + if (!objects && prepositions.length === 0) { + throw new UnreachableError(); + } else { + return { + type: "associated", + predicates, + objects, + prepositions, + }; + } + }), + phrase().map(( + predicate, + ) => ({ type: "single", predicate } as MultiplePredicates)), + ); } else { const [first, ...rest] = nestingRule; let type: "and conjunction" | "anu"; @@ -410,7 +404,9 @@ function multiplePredicates( type, predicates: [group, ...moreGroups], } as MultiplePredicates)), - singlePredicate(nestingRule), + lazy(() => multiplePredicates(rest)).filter((predicate) => + predicate.type !== "single" && predicate.type !== "associated" + ), ); } } From a89c463d3b2e4ba3c2d02331dcb1d39436da5113 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 16:42:22 +0800 Subject: [PATCH 153/271] remove unnecessary filter --- src/parser.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 0314185..b4e72d3 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -404,9 +404,7 @@ function multiplePredicates( type, predicates: [group, ...moreGroups], } as MultiplePredicates)), - lazy(() => multiplePredicates(rest)).filter((predicate) => - predicate.type !== "single" && predicate.type !== "associated" - ), + lazy(() => multiplePredicates(rest)), ); } } From 465bd63c7febb4138a77fecc10de0d09e16d9f1e Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 17:00:44 +0800 Subject: [PATCH 154/271] hoist preposition and quotation definition as clause type --- src/ast.ts | 5 ++++- src/parser.ts | 18 +++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index cc19730..854fbaa 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -63,7 +63,7 @@ export type MultiplePredicates = | { type: "and conjunction"; predicates: Array } | { type: "anu"; predicates: Array }; /** Represents a simple clause. */ -export type Clause = { type: "en phrases"; phrases: MultiplePhrases } | { +export type Clause = { type: "phrases"; phrases: MultiplePhrases } | { type: "o vocative"; phrases: MultiplePhrases; } | { @@ -77,6 +77,9 @@ export type Clause = { type: "en phrases"; phrases: MultiplePhrases } | { } | { type: "prepositions"; prepositions: Array; +} | { + type: "quotation"; + quotation: Quotation; }; /** Represents a clause including preclause and postclause. */ export type FullClause = { diff --git a/src/parser.ts b/src/parser.ts index b4e72d3..ef2edc5 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -430,9 +430,17 @@ function clause(): Parser { manyAtLeastOnce(optionalComma().with(preposition())).map(( prepositions, ) => ({ type: "prepositions", prepositions })), - subjectPhrases().map(( - phrases, - ) => ({ type: "en phrases", phrases } as Clause)), + subjectPhrases().map((phrases) => { + if ( + phrases.type === "single" && + (phrases.phrase.type === "preposition" || + phrases.phrase.type === "quotation") + ) { + throw new UnreachableError(); + } else { + return { type: "phrases", phrases } as Clause; + } + }), subjectPhrases().skip(specificWord("o")).map((phrases) => ({ type: "o vocative", phrases, @@ -464,6 +472,10 @@ function clause(): Parser { subjects: subjects, predicates, })), + quotation().map((quotation) => ({ + type: "quotation", + quotation, + })), ); } /** Parses a single clause including precaluse and postclause. */ From 7aa6686bfd8ae04bce43446b2c6910b12cfd18bc Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 17:08:13 +0800 Subject: [PATCH 155/271] rearrange definitions --- src/ast.ts | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 854fbaa..a129372 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -6,12 +6,6 @@ export type Modifier = | { type: "nanpa ordinal"; phrase: Phrase } | { type: "cardinal"; number: Array } | { type: "quotation"; quotation: Quotation }; -/** Represents quotation. */ -export type Quotation = { - sentences: Array; - leftMark: string; - rightMark: string; -}; /** * Represents a phrase including preverbial phrases, quotations, and * prepositional phrases intended for predicate. @@ -89,3 +83,9 @@ export type FullClause = { }; /** Represents a single full sentence. */ export type Sentence = { laClauses: Array; punctuation: string }; +/** Represents quotation. */ +export type Quotation = { + sentences: Array; + leftMark: string; + rightMark: string; +}; From c727bc5091706d2b3624496090babc04b49b3505 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 17:43:43 +0800 Subject: [PATCH 156/271] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b8c9168..f399bce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,7 +6,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e - New limitation list. - Drop support for "a" particle. -- (Downgrade) Error messages are now very inaccurate. +- (Downgrade) Error messages are now very unreliable. - Update translation list: - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭) From 3054ffc428890c680a421cfd4de9aa0a7f0074b9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 17:47:54 +0800 Subject: [PATCH 157/271] remove discord dm as contact option --- CHANGELOG.md | 1 + index.html | 1 - 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f399bce..889fd6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ For this version. The whole code has been rewritten. This makes the code a lot e - New limitation list. - Drop support for "a" particle. - (Downgrade) Error messages are now very unreliable. +- Remove Discord DM as contact option. - Update translation list: - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭) diff --git a/index.html b/index.html index 8e5e01a..19eff16 100644 --- a/index.html +++ b/index.html @@ -51,7 +51,6 @@

    Toki Pona Translator

    >Toki Pona to multiple English sentence translator. -
  • Dm me on Discord: never_rare
  • Email me: Date: Sat, 20 Jan 2024 19:03:44 +0800 Subject: [PATCH 158/271] make formatting more consistent --- src/ast.ts | 102 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 58 insertions(+), 44 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index a129372..5b92bcb 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -10,33 +10,41 @@ export type Modifier = * Represents a phrase including preverbial phrases, quotations, and * prepositional phrases intended for predicate. */ -export type Phrase = { - type: "default"; - headWord: string; - alaQuestion: boolean; - modifiers: Array; -} | { - type: "cardinal"; - number: Array; - modifiers: Array; -} | { - type: "preverb"; - preverb: string; - alaQuestion: boolean; - modifiers: Array; - phrase: Phrase; -} | { - type: "preposition"; - preposition: Preposition; -} | { - type: "quotation"; - quotation: Quotation; -}; +export type Phrase = + | { + type: "default"; + headWord: string; + alaQuestion: boolean; + modifiers: Array; + } + | { + type: "cardinal"; + number: Array; + modifiers: Array; + } + | { + type: "preverb"; + preverb: string; + alaQuestion: boolean; + modifiers: Array; + phrase: Phrase; + } + | { + type: "preposition"; + preposition: Preposition; + } + | { + type: "quotation"; + quotation: Quotation; + }; /** Represents multiple phrases separated by repeated particle or _anu_. */ -export type MultiplePhrases = { type: "single"; phrase: Phrase } | { - type: "and conjunction"; - phrases: Array; -} | { type: "anu"; phrases: Array }; +export type MultiplePhrases = + | { type: "single"; phrase: Phrase } + | { + type: "and conjunction"; + phrases: Array; + } + | { type: "anu"; phrases: Array }; /** Represents a single prepositional phrase. */ export type Preposition = { preposition: string; @@ -57,24 +65,30 @@ export type MultiplePredicates = | { type: "and conjunction"; predicates: Array } | { type: "anu"; predicates: Array }; /** Represents a simple clause. */ -export type Clause = { type: "phrases"; phrases: MultiplePhrases } | { - type: "o vocative"; - phrases: MultiplePhrases; -} | { - type: "li clause"; - subjects: MultiplePhrases; - predicates: MultiplePredicates; -} | { - type: "o clause"; - subjects: null | MultiplePhrases; - predicates: MultiplePredicates; -} | { - type: "prepositions"; - prepositions: Array; -} | { - type: "quotation"; - quotation: Quotation; -}; +export type Clause = + | { type: "phrases"; phrases: MultiplePhrases } + | { + type: "o vocative"; + phrases: MultiplePhrases; + } + | { + type: "li clause"; + subjects: MultiplePhrases; + predicates: MultiplePredicates; + } + | { + type: "o clause"; + subjects: null | MultiplePhrases; + predicates: MultiplePredicates; + } + | { + type: "prepositions"; + prepositions: Array; + } + | { + type: "quotation"; + quotation: Quotation; + }; /** Represents a clause including preclause and postclause. */ export type FullClause = { taso: boolean; From 5bcc908a8baf5ee873b06815712be9029abad083 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 19:41:43 +0800 Subject: [PATCH 159/271] Allow associated predicates to be nested --- src/parser.ts | 47 +++++++++++++++++++++++++++-------------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index ef2edc5..9e9aa66 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -354,32 +354,38 @@ function preposition(): Parser { phrases, })); } +function associatedPredicates( + nestingRule: Array<"li" | "o" | "anu">, +): Parser { + return sequence( + nestedPhrases(nestingRule), + optional( + optionalComma().with(specificWord("e")).with( + nestedPhrases(["e", "anu"]), + ), + ), + many(preposition()), + ).map(([predicates, objects, prepositions]) => { + if (!objects && prepositions.length === 0) { + throw new UnreachableError(); + } else { + return { + type: "associated", + predicates, + objects, + prepositions, + }; + } + }); +} /** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ +// TODO: ensure there's no duplicates function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { if (nestingRule.length === 0) { return choice( - sequence( - nestedPhrases([]), - optional( - optionalComma().with(specificWord("e")).with( - nestedPhrases(["e", "anu"]), - ), - ), - many(preposition()), - ).map(([predicates, objects, prepositions]) => { - if (!objects && prepositions.length === 0) { - throw new UnreachableError(); - } else { - return { - type: "associated", - predicates, - objects, - prepositions, - }; - } - }), + associatedPredicates([]), phrase().map(( predicate, ) => ({ type: "single", predicate } as MultiplePredicates)), @@ -404,6 +410,7 @@ function multiplePredicates( type, predicates: [group, ...moreGroups], } as MultiplePredicates)), + associatedPredicates(nestingRule), lazy(() => multiplePredicates(rest)), ); } From 380d38f1979774729169f93225c3092291c4b9fb Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 19:42:41 +0800 Subject: [PATCH 160/271] output AST count --- test-parser.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test-parser.ts b/test-parser.ts index dd3f533..a243936 100644 --- a/test-parser.ts +++ b/test-parser.ts @@ -2,7 +2,9 @@ import { OutputError } from "./src/error.ts"; import { parser } from "./src/parser.ts"; const input = await Deno.readTextFile("./test.txt"); -console.log(JSON.stringify(parser(input), (key, value) => { +const output = parser(input); +console.log(JSON.stringify(output, (key, value) => { if (key === "error") return (value as null | OutputError)?.message; else return value; }, 2)); +console.log(`The output has ${output.output.length} AST's`); From f75479d570b83d5177cfa5dc29e6a5cc286851ff Mon Sep 17 00:00:00 2001 From: neverRare Date: Sat, 20 Jan 2024 19:57:54 +0800 Subject: [PATCH 161/271] add FIXME for multiple predicate parser --- src/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.ts b/src/parser.ts index 9e9aa66..9d4feef 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -380,6 +380,7 @@ function associatedPredicates( } /** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ // TODO: ensure there's no duplicates +// FIXME: This always outputs 3 AST's function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { From 68e8a153b3e763f0e227eeb92268c5dbdbfcbffe Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 08:16:16 +0800 Subject: [PATCH 162/271] add warning to documentation comments --- src/parser.ts | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 9d4feef..2233d6a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -143,6 +143,10 @@ function sequence>( * Parses `parser` multiple times and returns an `Array`. The resulting * output includes all outputs from parsing nothing to parsing as many as * possible. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. */ function many(parser: Parser): Parser> { return choice( @@ -152,7 +156,13 @@ function many(parser: Parser): Parser> { nothing().map(() => []), ); } -/** Like `many` but parses at least once. */ +/** + * Like `many` but parses at least once. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ function manyAtLeastOnce(parser: Parser): Parser> { return sequence(parser, many(parser)).map(( [first, rest], @@ -161,6 +171,10 @@ function manyAtLeastOnce(parser: Parser): Parser> { /** * Parses `parser` multiple times and returns an `Array`. This function is * exhaustive unlike `many`. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. */ function all(parser: Parser): Parser> { return choiceOnlyOne( @@ -170,7 +184,13 @@ function all(parser: Parser): Parser> { nothing().map(() => []), ); } -/** Like `all` but parses at least once. */ +/** + * Like `all` but parses at least once. + * + * ## ⚠️ Warning + * + * Will cause infinite recursion if the parser can parse nothing. + */ function allAtLeastOnce(parser: Parser): Parser> { return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]); } From b5c7629f02f0ffc506d98f9e13237d28430a8d5f Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 08:41:09 +0800 Subject: [PATCH 163/271] fix multiplePredicate parser --- src/parser.ts | 58 ++++++++++++++++++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 19 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 2233d6a..13ced2b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -323,8 +323,11 @@ function phrase(): Parser { quotation().map((quotation) => ({ type: "quotation", quotation })), ); } -/** Parses nested phrases with given nesting rule. */ -function nestedPhrases( +/** + * Parses nested phrases with given nesting rule, only accepting the top level + * operation. + */ +function nestedPhrasesOnly( nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, ): Parser { if (nestingRule.length === 0) { @@ -339,18 +342,31 @@ function nestedPhrases( } else { type = "anu"; } - return choice( - sequence( - lazy(() => nestedPhrases(rest)), - manyAtLeastOnce( - optionalComma().with(specificWord(first)).with( - lazy(() => nestedPhrases(rest)), - ), + return sequence( + nestedPhrases(rest), + manyAtLeastOnce( + optionalComma().with(specificWord(first)).with( + nestedPhrases(rest), ), - ).map(([group, moreGroups]) => ({ - type, - phrases: [group, ...moreGroups], - })), + ), + ).map(([group, moreGroups]) => ({ + type, + phrases: [group, ...moreGroups], + })); + } +} +/** Parses nested phrases with given nesting rule. */ +function nestedPhrases( + nestingRule: Array<"en" | "li" | "o" | "e" | "anu">, +): Parser { + if (nestingRule.length === 0) { + return phrase().map( + (phrase) => ({ type: "single", phrase } as MultiplePhrases), + ); + } else { + const [_, ...rest] = nestingRule; + return choice( + nestedPhrasesOnly(nestingRule), lazy(() => nestedPhrases(rest)), ); } @@ -378,7 +394,7 @@ function associatedPredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { return sequence( - nestedPhrases(nestingRule), + nestedPhrasesOnly(nestingRule), optional( optionalComma().with(specificWord("e")).with( nestedPhrases(["e", "anu"]), @@ -399,8 +415,6 @@ function associatedPredicates( }); } /** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ -// TODO: ensure there's no duplicates -// FIXME: This always outputs 3 AST's function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { @@ -420,18 +434,24 @@ function multiplePredicates( type = "anu"; } return choice( + associatedPredicates(nestingRule), sequence( - lazy(() => multiplePredicates(rest)), + choice( + associatedPredicates(nestingRule), + lazy(() => multiplePredicates(rest)), + ), manyAtLeastOnce( optionalComma().with(specificWord(first)).with( - lazy(() => multiplePredicates(rest)), + choice( + associatedPredicates(nestingRule), + lazy(() => multiplePredicates(rest)), + ), ), ), ).map(([group, moreGroups]) => ({ type, predicates: [group, ...moreGroups], } as MultiplePredicates)), - associatedPredicates(nestingRule), lazy(() => multiplePredicates(rest)), ); } From e57b4d8a2eed3ea8d5b94e57624a37f009886ea1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 08:41:25 +0800 Subject: [PATCH 164/271] fix comment --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 13ced2b..dd318ed 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -414,7 +414,7 @@ function associatedPredicates( } }); } -/** Parses multiple predicates without _li_, _o_, nor _anu_ at the beginning. */ +/** Parses multiple predicates without _li_ nor _o_ at the beginning. */ function multiplePredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { From abafe0fbe424de0a14c0e383b7428ffa50fa833f Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 08:43:22 +0800 Subject: [PATCH 165/271] add and fix comments --- src/parser.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index dd318ed..ccbb59b 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -371,6 +371,7 @@ function nestedPhrases( ); } } +/** Parses phrases separated by _en_ or _anu_. */ function subjectPhrases(): Parser { return choice( nestedPhrases(["en", "anu"]), @@ -390,6 +391,9 @@ function preposition(): Parser { phrases, })); } +/** + * Parses associated predicates whose predicates only uses top level operator. + */ function associatedPredicates( nestingRule: Array<"li" | "o" | "anu">, ): Parser { From 94e307245ed54c09f7ebf2d269278a949662615b Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 08:45:26 +0800 Subject: [PATCH 166/271] simplify operations --- src/parser.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index ccbb59b..bb8a8ba 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -364,10 +364,9 @@ function nestedPhrases( (phrase) => ({ type: "single", phrase } as MultiplePhrases), ); } else { - const [_, ...rest] = nestingRule; return choice( nestedPhrasesOnly(nestingRule), - lazy(() => nestedPhrases(rest)), + lazy(() => nestedPhrases(nestingRule.slice(1))), ); } } From a93ecdca1df02f27c19dddf9a4298e1f2d3f2d5d Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 09:06:46 +0800 Subject: [PATCH 167/271] prevent infinite recursion --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index bb8a8ba..23a7e72 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -365,7 +365,7 @@ function nestedPhrases( ); } else { return choice( - nestedPhrasesOnly(nestingRule), + lazy(() => nestedPhrasesOnly(nestingRule)), lazy(() => nestedPhrases(nestingRule.slice(1))), ); } From b2a6e31a499fb41f44e6c8f8ecc264c0a7d64038 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:09:17 +0800 Subject: [PATCH 168/271] implement reduplicate and restructure AST --- src/ast.ts | 43 ++++++++++++++++++++++----------- src/parser.ts | 67 ++++++++++++++++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 39 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 5b92bcb..6fe345e 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,10 +1,33 @@ +/** Represents a word unit. */ +export type WordUnit = + | { + type: "default"; + word: string; + // emphasis: Array; + } + | { + type: "x ala x"; + word: string; + // firstEmphasis: Array; + // secondEmphasis: Array; + } + | { + type: "reduplication"; + word: string; + count: number; + // emphasis: Array; + } + | { + type: "numbers"; + numbers: Array; + // emphasis: Array; + }; /** Represents a single modifier. */ export type Modifier = - | { type: "word"; word: string; alaQuestion: boolean } + | { type: "default"; word: WordUnit } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } - | { type: "nanpa ordinal"; phrase: Phrase } - | { type: "cardinal"; number: Array } + | { type: "nanpa"; phrase: Phrase } | { type: "quotation"; quotation: Quotation }; /** * Represents a phrase including preverbial phrases, quotations, and @@ -13,19 +36,12 @@ export type Modifier = export type Phrase = | { type: "default"; - headWord: string; - alaQuestion: boolean; - modifiers: Array; - } - | { - type: "cardinal"; - number: Array; + headWord: WordUnit; modifiers: Array; } | { type: "preverb"; - preverb: string; - alaQuestion: boolean; + preverb: WordUnit; modifiers: Array; phrase: Phrase; } @@ -47,8 +63,7 @@ export type MultiplePhrases = | { type: "anu"; phrases: Array }; /** Represents a single prepositional phrase. */ export type Preposition = { - preposition: string; - alaQuestion: boolean; + preposition: WordUnit; modifiers: Array; /** This cannot be an "and conjunction": only "anu" or "single". */ phrases: MultiplePhrases; diff --git a/src/parser.ts b/src/parser.ts index 23a7e72..856f11e 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -8,6 +8,7 @@ import { Preposition, Quotation, Sentence, + WordUnit, } from "./ast.ts"; import { UnreachableError, UnrecognizedError } from "./error.ts"; import { Output } from "./output.ts"; @@ -43,6 +44,11 @@ class Parser { this.parser(src).filter(({ value }) => mapper(value)) ); } + then(mapper: (x: T) => Parser): Parser { + return new Parser((src) => + this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) + ); + } /** Takes another parser and discards the first parsing result. */ with(parser: Parser): Parser { return sequence(this, parser).map(([_, output]) => output); @@ -229,16 +235,20 @@ function specificWord(thatWord: string): Parser { else throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`); }); } -/** Parses X ala X construction as well as just X */ -function optionalAlaQuestion( - parser: Parser, -): Parser<[string, boolean]> { +/** Parses word unit without numbers. */ +function wordUnit(word: Parser): Parser { return choice( - sequence(parser.skip(specificWord("ala")), parser).map(([left, right]) => { - if (left === right) return [left, true] as [string, boolean]; - else throw new UnreachableError(); - }), - parser.map((word) => [word, false]), + word.map((word) => ({ type: "default", word } as WordUnit)), + word.then((word) => specificWord("ala").with(specificWord(word))).map(( + word, + ) => ({ type: "x ala x", word })), + word.then((word) => + all(specificWord(word)).map((words) => ({ + type: "reduplication", + word, + count: words.length + 1, + })) + ), ); } /** Parses number words in order. */ @@ -260,13 +270,19 @@ function modifiers(): Parser> { return sequence( many( choice( - wordFrom(CONTENT_WORD, "modifier").map(( + wordUnit(wordFrom(CONTENT_WORD, "modifier")).map((word) => ({ + type: "default", word, - ) => ({ type: "word", word } as Modifier)), + } as Modifier)), properWords().map(( words, ) => ({ type: "proper words", words } as Modifier)), - number().map((number) => ({ type: "cardinal", number } as Modifier)), + number().map(( + numbers, + ) => ({ + type: "default", + word: { type: "numbers", numbers }, + } as Modifier)), quotation().map(( quotation, ) => ({ type: "quotation", quotation } as Modifier)), @@ -274,7 +290,7 @@ function modifiers(): Parser> { ), many( specificWord("nanpa").with(phrase()).map((phrase) => ({ - type: "nanpa ordinal", + type: "nanpa", phrase, } as Modifier)), ), @@ -292,18 +308,21 @@ function modifiers(): Parser> { function phrase(): Parser { return choice( sequence(number(), lazy(modifiers)).map(( - [number, modifiers], - ) => ({ type: "cardinal", number, modifiers } as Phrase)), + [numbers, modifiers], + ) => ({ + type: "default", + headWord: { type: "numbers", numbers }, + modifiers, + } as Phrase)), sequence( - optionalAlaQuestion(wordFrom(PREVERB, "preverb")), + wordUnit(wordFrom(PREVERB, "preverb")), lazy(modifiers), lazy(phrase), ).map(( - [[preverb, alaQuestion], modifiers, phrase], + [preverb, modifiers, phrase], ) => ({ type: "preverb", preverb, - alaQuestion, modifiers, phrase, } as Phrase)), @@ -312,12 +331,11 @@ function phrase(): Parser { preposition, })), sequence( - optionalAlaQuestion(wordFrom(CONTENT_WORD, "headword")), + wordUnit(wordFrom(CONTENT_WORD, "headword")), lazy(modifiers), - ).map(([[headWord, alaQuestion], modifiers]) => ({ + ).map(([headWord, modifiers]) => ({ type: "default", headWord, - alaQuestion, modifiers, })), quotation().map((quotation) => ({ type: "quotation", quotation })), @@ -380,12 +398,11 @@ function subjectPhrases(): Parser { /** Parses prepositional phrase. */ function preposition(): Parser { return sequence( - optionalAlaQuestion(wordFrom(PREPOSITION, "preposition")), + wordUnit(wordFrom(PREPOSITION, "preposition")), modifiers(), nestedPhrases(["anu"]), - ).map(([[preposition, alaQuestion], modifiers, phrases]) => ({ + ).map(([preposition, modifiers, phrases]) => ({ preposition, - alaQuestion, modifiers, phrases, })); @@ -471,7 +488,7 @@ function clause(): Parser { type: "single", phrase: { type: "default", - headWord: subject, + headWord: { type: "default", word: subject }, alaQuestion: false, modifiers: [], }, From 36e9a3ff68f93f585a79ce786a6b283262241d94 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:10:01 +0800 Subject: [PATCH 169/271] use all when parsing tu and wan --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 856f11e..1178d62 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -257,8 +257,8 @@ function number(): Parser> { many(choice(specificWord("ale"), specificWord("ali"))), many(specificWord("mute")), many(specificWord("luka")), - many(specificWord("tu")), - many(specificWord("wan")), + all(specificWord("tu")), + all(specificWord("wan")), ).map((array) => { const output = array.flat(); if (output.length >= 2) return output; From 2528622f2a33f973723f1947db1109df4016feca Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:13:51 +0800 Subject: [PATCH 170/271] add document comments --- src/parser.ts | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/parser.ts b/src/parser.ts index 1178d62..42c2479 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -39,11 +39,19 @@ class Parser { })) ); } + /** + * Filters outputs. The mapper may throw OutputError as well in place of + * returning false. + */ filter(mapper: (x: T) => boolean): Parser { return new Parser((src) => this.parser(src).filter(({ value }) => mapper(value)) ); } + /** + * Parses `this` then passes the parsing result in the mapper. The resulting + * parser is then also parsed. + */ then(mapper: (x: T) => Parser): Parser { return new Parser((src) => this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) From b3a59ea0f47d363d96e19e7ab7ef1de7a2c414fd Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:14:17 +0800 Subject: [PATCH 171/271] revert using all when parsing tu and wan --- src/parser.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 42c2479..33f4cbc 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -265,8 +265,8 @@ function number(): Parser> { many(choice(specificWord("ale"), specificWord("ali"))), many(specificWord("mute")), many(specificWord("luka")), - all(specificWord("tu")), - all(specificWord("wan")), + many(specificWord("tu")), + many(specificWord("wan")), ).map((array) => { const output = array.flat(); if (output.length >= 2) return output; From b3f9db305669590d8fe58021617dd653431103ed Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:19:52 +0800 Subject: [PATCH 172/271] improve reduplication parser --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 33f4cbc..8ec607d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -251,7 +251,7 @@ function wordUnit(word: Parser): Parser { word, ) => ({ type: "x ala x", word })), word.then((word) => - all(specificWord(word)).map((words) => ({ + allAtLeastOnce(specificWord(word)).map((words) => ({ type: "reduplication", word, count: words.length + 1, From 8c49daa27f1e667be2696a955b673dbeedb280f5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:22:00 +0800 Subject: [PATCH 173/271] improve documentation --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 8ec607d..edb8873 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -57,7 +57,7 @@ class Parser { this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) ); } - /** Takes another parser and discards the first parsing result. */ + /** Takes another parser and discards the parsing result of `this`. */ with(parser: Parser): Parser { return sequence(this, parser).map(([_, output]) => output); } From a0b9f6dee6974732b4880b44a578069448c6ba4d Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:22:54 +0800 Subject: [PATCH 174/271] order matters --- src/parser.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index edb8873..78fdd67 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -246,10 +246,6 @@ function specificWord(thatWord: string): Parser { /** Parses word unit without numbers. */ function wordUnit(word: Parser): Parser { return choice( - word.map((word) => ({ type: "default", word } as WordUnit)), - word.then((word) => specificWord("ala").with(specificWord(word))).map(( - word, - ) => ({ type: "x ala x", word })), word.then((word) => allAtLeastOnce(specificWord(word)).map((words) => ({ type: "reduplication", @@ -257,6 +253,10 @@ function wordUnit(word: Parser): Parser { count: words.length + 1, })) ), + word.then((word) => specificWord("ala").with(specificWord(word))).map(( + word, + ) => ({ type: "x ala x", word })), + word.map((word) => ({ type: "default", word } as WordUnit)), ); } /** Parses number words in order. */ From ca28c7e53fb899ca8d944f7d87ab574cbb9f7121 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:31:24 +0800 Subject: [PATCH 175/271] nanpa particle can now be reduplicated --- src/ast.ts | 2 +- src/parser.ts | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 6fe345e..59391c7 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -27,7 +27,7 @@ export type Modifier = | { type: "default"; word: WordUnit } | { type: "proper words"; words: string } | { type: "pi"; phrase: Phrase } - | { type: "nanpa"; phrase: Phrase } + | { type: "nanpa"; nanpa: WordUnit; phrase: Phrase } | { type: "quotation"; quotation: Quotation }; /** * Represents a phrase including preverbial phrases, quotations, and diff --git a/src/parser.ts b/src/parser.ts index 78fdd67..034529d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -297,8 +297,11 @@ function modifiers(): Parser> { ), ), many( - specificWord("nanpa").with(phrase()).map((phrase) => ({ + sequence(wordUnit(specificWord("nanpa")), phrase()).map(( + [nanpa, phrase], + ) => ({ type: "nanpa", + nanpa, phrase, } as Modifier)), ), From daafc15963ada106b4ba944a2538706fd75b6bce Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:33:28 +0800 Subject: [PATCH 176/271] reduplication is now non-exhaustive --- src/parser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 034529d..046db75 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -247,7 +247,7 @@ function specificWord(thatWord: string): Parser { function wordUnit(word: Parser): Parser { return choice( word.then((word) => - allAtLeastOnce(specificWord(word)).map((words) => ({ + manyAtLeastOnce(specificWord(word)).map((words) => ({ type: "reduplication", word, count: words.length + 1, From 9f93e8ea5ed381bdffd10f8eae111622ebbf17a7 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 10:42:16 +0800 Subject: [PATCH 177/271] allow taso and anu seme to be reduplicated --- src/ast.ts | 4 ++-- src/parser.ts | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 59391c7..b96f931 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -106,8 +106,8 @@ export type Clause = }; /** Represents a clause including preclause and postclause. */ export type FullClause = { - taso: boolean; - anuSeme: boolean; + taso: null | WordUnit; + anuSeme: null | WordUnit; clause: Clause; }; /** Represents a single full sentence. */ diff --git a/src/parser.ts b/src/parser.ts index 046db75..b586958 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -560,14 +560,16 @@ function clause(): Parser { /** Parses a single clause including precaluse and postclause. */ function fullClause(): Parser { return sequence( - optional(specificWord("taso").skip(optionalComma())), + optional(wordUnit(specificWord("taso")).skip(optionalComma())), clause(), optional( - sequence(optionalComma(), specificWord("anu"), specificWord("seme")), + optionalComma().with(specificWord("anu")).with( + wordUnit(specificWord("seme")), + ), ), ).map(([taso, clause, anuSeme]) => ({ - taso: !!taso, - anuSeme: !!anuSeme, + taso, + anuSeme, clause, })); } From 088cc7c791fc1bbe0283f55901f1e2183e7ffb99 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 11:16:02 +0800 Subject: [PATCH 178/271] remove comments about "a" particles --- src/ast.ts | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index b96f931..1992d4b 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -1,27 +1,9 @@ /** Represents a word unit. */ export type WordUnit = - | { - type: "default"; - word: string; - // emphasis: Array; - } - | { - type: "x ala x"; - word: string; - // firstEmphasis: Array; - // secondEmphasis: Array; - } - | { - type: "reduplication"; - word: string; - count: number; - // emphasis: Array; - } - | { - type: "numbers"; - numbers: Array; - // emphasis: Array; - }; + | { type: "default"; word: string } + | { type: "x ala x"; word: string } + | { type: "reduplication"; word: string; count: number } + | { type: "numbers"; numbers: Array }; /** Represents a single modifier. */ export type Modifier = | { type: "default"; word: WordUnit } From da37a088615f8b73340413c53a24ed3d4f71f62d Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 11:20:17 +0800 Subject: [PATCH 179/271] rename x to value --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index b586958..f1537db 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -31,7 +31,7 @@ class Parser { * Maps the parsing result. For convenience, the mapper function can throw * an OutputError; Other kinds of error are ignored. */ - map(mapper: (x: T) => U): Parser { + map(mapper: (value: T) => U): Parser { return new Parser((src) => this.parser(src).map(({ value, rest }) => ({ value: mapper(value), @@ -43,7 +43,7 @@ class Parser { * Filters outputs. The mapper may throw OutputError as well in place of * returning false. */ - filter(mapper: (x: T) => boolean): Parser { + filter(mapper: (value: T) => boolean): Parser { return new Parser((src) => this.parser(src).filter(({ value }) => mapper(value)) ); @@ -52,7 +52,7 @@ class Parser { * Parses `this` then passes the parsing result in the mapper. The resulting * parser is then also parsed. */ - then(mapper: (x: T) => Parser): Parser { + then(mapper: (value: T) => Parser): Parser { return new Parser((src) => this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest)) ); From 921fa80525a9937f8e1dad83ec6bdc4b62d57bd0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 12:53:04 +0800 Subject: [PATCH 180/271] first implementation of filters --- src/filter.ts | 88 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/parser.ts | 17 ++++++---- 2 files changed, 99 insertions(+), 6 deletions(-) create mode 100644 src/filter.ts diff --git a/src/filter.ts b/src/filter.ts new file mode 100644 index 0000000..3faf7bb --- /dev/null +++ b/src/filter.ts @@ -0,0 +1,88 @@ +import { Modifier, MultiplePhrases, Phrase } from "./ast.ts"; +import { UnrecognizedError } from "./error.ts"; + +/** Array of filter rules for a single modifier. */ +export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ + // quotation modifier cannot exist + (modifier) => { + if (modifier.type === "quotation") { + throw new UnrecognizedError("quotation as modifier"); + } + return true; + }, + // pi cannot contain preposition + (modifier) => { + if (modifier.type === "pi" && modifier.phrase.type === "preposition") { + throw new UnrecognizedError("preposition inside pi"); + } + return true; + }, + // pi must follow phrases with modifier + (modifier) => { + if (modifier.type === "pi") { + const phrase = modifier.phrase; + if (phrase.type === "default" && phrase.modifiers.length === 0) { + throw new UnrecognizedError("pi followed by one word"); + } + } + return true; + }, + // pi cannot be nested + (modifier) => { + if (modifier.type === "pi") { + if (phraseHasPi(modifier.phrase)) { + throw new UnrecognizedError("nested pi"); + } + } + return true; + }, +]; +/** Helper function for generating filter function. */ +export function filter( + rules: Array<(value: T) => boolean>, +): (value: T) => boolean { + return (value) => rules.every((rule) => rule(value)); +} +/** Checks if modifiers has _pi_. */ +function modifiersHasPi(modifiers: Array): boolean { + return modifiers.some((modifier) => { + if ( + modifier.type === "default" || modifier.type === "proper words" || + modifier.type === "quotation" + ) { + return false; + } else if (modifier.type === "nanpa") { + return phraseHasPi(modifier.phrase); + } else if (modifier.type === "pi") { + return true; + } else { + throw new Error("unreachable error"); + } + }); +} +/** Checks if a single phrase has _pi_. */ +function phraseHasPi(phrase: Phrase): boolean { + if (phrase.type === "default") { + return modifiersHasPi(phrase.modifiers); + } else if (phrase.type === "preverb") { + return modifiersHasPi(phrase.modifiers) || phraseHasPi(phrase.phrase); + } else if (phrase.type === "preposition") { + const preposition = phrase.preposition; + return modifiersHasPi(preposition.modifiers) || + multiplePhrasesHasPi(preposition.phrases); + } else if (phrase.type === "quotation") { + return false; + } else { + throw new Error("unreachable error"); + } +} +/** Checks if multiple phrases has _pi_. */ +function multiplePhrasesHasPi(phrases: MultiplePhrases): boolean { + if (phrases.type === "single") { + return phraseHasPi(phrases.phrase); + } else if (phrases.type === "and conjunction" || phrases.type === "anu") { + return phrases.phrases.some(multiplePhrasesHasPi); + } else { + throw new Error("unreachable error"); + } +} diff --git a/src/parser.ts b/src/parser.ts index f1537db..940e72a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -18,6 +18,7 @@ import { PREVERB, SPECIAL_SUBJECT, } from "./vocabulary.ts"; +import { filter, MODIFIER_RULES } from "./filter.ts"; /** A single parsing result. */ type ValueRest = { value: T; rest: string }; @@ -281,19 +282,23 @@ function modifiers(): Parser> { wordUnit(wordFrom(CONTENT_WORD, "modifier")).map((word) => ({ type: "default", word, - } as Modifier)), + } as Modifier)).filter(filter(MODIFIER_RULES)), properWords().map(( words, - ) => ({ type: "proper words", words } as Modifier)), + ) => ({ type: "proper words", words } as Modifier)).filter( + filter(MODIFIER_RULES), + ), number().map(( numbers, ) => ({ type: "default", word: { type: "numbers", numbers }, - } as Modifier)), + } as Modifier)).filter(filter(MODIFIER_RULES)), quotation().map(( quotation, - ) => ({ type: "quotation", quotation } as Modifier)), + ) => ({ type: "quotation", quotation } as Modifier)).filter( + filter(MODIFIER_RULES), + ), ), ), many( @@ -303,13 +308,13 @@ function modifiers(): Parser> { type: "nanpa", nanpa, phrase, - } as Modifier)), + } as Modifier)).filter(filter(MODIFIER_RULES)), ), many( specificWord("pi").with(phrase()).map((phrase) => ({ type: "pi", phrase, - } as Modifier)), + } as Modifier)).filter(filter(MODIFIER_RULES)), ), ).map(( [modifiers, nanpaModifiers, piModifiers], From a680e9b6d931b318b0ac7559d58edca408301e15 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 13:34:39 +0800 Subject: [PATCH 181/271] add filters for nanpa --- src/filter.ts | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index 3faf7bb..8915f29 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -10,6 +10,49 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ } return true; }, + // nanpa construction cannot contain preposition + (modifier) => { + if (modifier.type === "nanpa" && modifier.phrase.type === "preposition") { + throw new UnrecognizedError("preposition inside nanpa"); + } + return true; + }, + // nanpa construction cannot contain preverb + (modifier) => { + if (modifier.type === "nanpa" && modifier.phrase.type === "preverb") { + throw new UnrecognizedError("preverb inside nanpa"); + } + return true; + }, + // nanpa construction cannot contain quotation + (modifier) => { + if (modifier.type === "nanpa" && modifier.phrase.type === "quotation") { + throw new UnrecognizedError("quotation inside nanpa"); + } + return true; + }, + // nanpa construction cannot contain pi + (modifier) => { + if (modifier.type === "nanpa" && modifier.phrase.type === "default") { + if ( + modifier.phrase.modifiers.some((modifier) => modifier.type === "pi") + ) { + throw new UnrecognizedError("pi inside nanpa"); + } + } + return true; + }, + // nanpa construction cannot contain nanpa + (modifier) => { + if (modifier.type === "nanpa" && modifier.phrase.type === "default") { + if ( + modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa") + ) { + throw new UnrecognizedError("nanpa inside nanpa"); + } + } + return true; + }, // pi cannot contain preposition (modifier) => { if (modifier.type === "pi" && modifier.phrase.type === "preposition") { From f14e51d3ebbd1d4574002aeaa20eae1748eb732b Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 13:46:26 +0800 Subject: [PATCH 182/271] improve error message --- src/filter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index 8915f29..448f8d2 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -74,7 +74,7 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ (modifier) => { if (modifier.type === "pi") { if (phraseHasPi(modifier.phrase)) { - throw new UnrecognizedError("nested pi"); + throw new UnrecognizedError("pi inside pi"); } } return true; From e02f4d65f9a4536402945330b65ff250cfae5007 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 13:54:36 +0800 Subject: [PATCH 183/271] apply filter for word unit --- src/filter.ts | 12 +++++++++++- src/parser.ts | 7 ++++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index 448f8d2..35a4d35 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,6 +1,16 @@ -import { Modifier, MultiplePhrases, Phrase } from "./ast.ts"; +import { Modifier, MultiplePhrases, Phrase, WordUnit } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; +/** Array of filter rules for a word unit. */ +export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ + // avoid "seme ala seme" + (wordUnit) => { + if (wordUnit.type === "x ala x" && wordUnit.word === "seme") { + throw new UnrecognizedError('"seme ala seme"'); + } + return true; + }, +]; /** Array of filter rules for a single modifier. */ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ // quotation modifier cannot exist diff --git a/src/parser.ts b/src/parser.ts index 940e72a..075267d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -19,6 +19,7 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; import { filter, MODIFIER_RULES } from "./filter.ts"; +import { WORD_UNIT_RULES } from "./filter.ts"; /** A single parsing result. */ type ValueRest = { value: T; rest: string }; @@ -252,13 +253,13 @@ function wordUnit(word: Parser): Parser { type: "reduplication", word, count: words.length + 1, - })) + } as WordUnit)) ), word.then((word) => specificWord("ala").with(specificWord(word))).map(( word, - ) => ({ type: "x ala x", word })), + ) => ({ type: "x ala x", word } as WordUnit)), word.map((word) => ({ type: "default", word } as WordUnit)), - ); + ).filter(filter(WORD_UNIT_RULES)); } /** Parses number words in order. */ function number(): Parser> { From 8d85ba176bb9df245ffded334297b3180ac452a0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 13:57:14 +0800 Subject: [PATCH 184/271] add more rules for word unit --- src/filter.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index 35a4d35..890baa1 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -10,6 +10,16 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ } return true; }, + // avoid reduplication of "wan" and "tu" + (wordUnit) => { + if ( + wordUnit.type === "x ala x" && + (wordUnit.word === "wan" || wordUnit.word === "tu") + ) { + throw new UnrecognizedError(`reduplication of ${wordUnit.word}`); + } + return true; + }, ]; /** Array of filter rules for a single modifier. */ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ From 4f8fb3c316201c3851ffdfac01f2d87274fa2a18 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 14:13:01 +0800 Subject: [PATCH 185/271] implement filter for array of modifiers --- src/filter.ts | 42 ++++++++++++++++++++++++++++++++++++++++++ src/parser.ts | 12 +++++++++--- 2 files changed, 51 insertions(+), 3 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index 890baa1..8fd2d2a 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -100,6 +100,48 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ return true; }, ]; +export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ + // no multiple pi + (modifiers) => { + if (modifiers.filter((modifier) => modifier.type === "pi").length > 1) { + throw new UnrecognizedError("multiple pi"); + } + return true; + }, + // no multiple nanpa + (modifiers) => { + if (modifiers.filter((modifier) => modifier.type === "nanpa").length > 1) { + throw new UnrecognizedError("multiple nanpa"); + } + return true; + }, + // no multiple proper words + (modifiers) => { + if ( + modifiers.filter((modifier) => modifier.type === "proper words").length > + 1 + ) { + throw new UnrecognizedError("multiple proper words"); + } + return true; + }, + // no multiple number words + (modifiers) => { + function filter(modifier: Modifier): boolean { + if (modifier.type === "default") { + const word = modifier.word; + return word.type === "numbers" || + (word.type === "default" && + (word.word === "wan" || word.word === "tu")); + } + return false; + } + if (modifiers.filter(filter).length > 1) { + throw new UnrecognizedError("multiple number words"); + } + return true; + }, +]; /** Helper function for generating filter function. */ export function filter( rules: Array<(value: T) => boolean>, diff --git a/src/parser.ts b/src/parser.ts index 075267d..aac6d92 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -18,8 +18,12 @@ import { PREVERB, SPECIAL_SUBJECT, } from "./vocabulary.ts"; -import { filter, MODIFIER_RULES } from "./filter.ts"; -import { WORD_UNIT_RULES } from "./filter.ts"; +import { + filter, + MODIFIER_RULES, + MODIFIERS_RULES, + WORD_UNIT_RULES, +} from "./filter.ts"; /** A single parsing result. */ type ValueRest = { value: T; rest: string }; @@ -319,7 +323,9 @@ function modifiers(): Parser> { ), ).map(( [modifiers, nanpaModifiers, piModifiers], - ) => [...modifiers, ...nanpaModifiers, ...piModifiers]); + ) => [...modifiers, ...nanpaModifiers, ...piModifiers]).filter( + filter(MODIFIERS_RULES), + ); } /** Parses phrases including preverbial phrases. */ function phrase(): Parser { From 4253f6f8f4f5e81d4a476899d6c58f07ec108b33 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 14:15:55 +0800 Subject: [PATCH 186/271] add comment --- src/filter.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/filter.ts b/src/filter.ts index 8fd2d2a..cfaf10e 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -100,6 +100,7 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ return true; }, ]; +/** Array of filter rules for multiple modifiers. */ export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ // no multiple pi (modifiers) => { From 17e3bb4b584eae3561c4d00dff42cdac4b39b160 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 14:57:05 +0800 Subject: [PATCH 187/271] apply filter for phrase and preposition --- src/filter.ts | 75 ++++++++++++++++++++++++++++++++++++++++++++------- src/parser.ts | 14 ++++++---- 2 files changed, 74 insertions(+), 15 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index cfaf10e..183a6fc 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,4 +1,10 @@ -import { Modifier, MultiplePhrases, Phrase, WordUnit } from "./ast.ts"; +import { + Modifier, + MultiplePhrases, + Phrase, + Preposition, + WordUnit, +} from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; /** Array of filter rules for a word unit. */ @@ -128,17 +134,45 @@ export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ }, // no multiple number words (modifiers) => { - function filter(modifier: Modifier): boolean { - if (modifier.type === "default") { - const word = modifier.word; - return word.type === "numbers" || - (word.type === "default" && - (word.word === "wan" || word.word === "tu")); + if (modifiers.filter(modifierIsNumeric).length > 1) { + throw new UnrecognizedError("multiple number words"); + } + return true; + }, +]; +/** Array of filter rules for a single phrase. */ +export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ + // Disallow preverb modifiers other than _ala_ + (phrase) => { + if (phrase.type === "preverb") { + if (!modifiersIsAla(phrase.modifiers)) { + throw new UnrecognizedError('preverb with modifiers other than "ala"'); } - return false; } - if (modifiers.filter(filter).length > 1) { - throw new UnrecognizedError("multiple number words"); + return true; + }, + // No multiple number words + (phrase) => { + if (phrase.type === "default") { + if ( + phrase.headWord.type === "numbers" || + (phrase.headWord.type === "default" && + (phrase.headWord.word === "wan" || phrase.headWord.word === "tu")) + ) { + if (phrase.modifiers.some(modifierIsNumeric)) { + throw new UnrecognizedError("Multiple number words"); + } + } + } + return true; + }, +]; +/** Array of filter rules for a single phrase. */ +export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ + // Disallow preverb modifiers other than _ala_ + (preposition) => { + if (!modifiersIsAla(preposition.modifiers)) { + throw new UnrecognizedError('preverb with modifiers other than "ala"'); } return true; }, @@ -149,6 +183,27 @@ export function filter( ): (value: T) => boolean { return (value) => rules.every((rule) => rule(value)); } +/** Helper function for checking whether a modifier is numeric. */ +function modifierIsNumeric(modifier: Modifier): boolean { + if (modifier.type === "default") { + const word = modifier.word; + return word.type === "numbers" || + (word.type === "default" && + (word.word === "wan" || word.word === "tu")); + } + return false; +} +/** Helper function for checking if the modifiers is exactly just _ala_. */ +function modifiersIsAla(modifiers: Array): boolean { + if (modifiers.length > 1) { + return false; + } else if (modifiers.length === 1) { + const [modifier] = modifiers; + return modifier.type === "default" && modifier.word.type === "default" && + modifier.word.word === "ala"; + } + return false; +} /** Checks if modifiers has _pi_. */ function modifiersHasPi(modifiers: Array): boolean { return modifiers.some((modifier) => { diff --git a/src/parser.ts b/src/parser.ts index aac6d92..d83c0d2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -22,6 +22,8 @@ import { filter, MODIFIER_RULES, MODIFIERS_RULES, + PHRASE_RULE, + PREPOSITION_RULE, WORD_UNIT_RULES, } from "./filter.ts"; @@ -352,7 +354,7 @@ function phrase(): Parser { lazy(preposition).map((preposition) => ({ type: "preposition", preposition, - })), + } as Phrase)), sequence( wordUnit(wordFrom(CONTENT_WORD, "headword")), lazy(modifiers), @@ -360,9 +362,11 @@ function phrase(): Parser { type: "default", headWord, modifiers, - })), - quotation().map((quotation) => ({ type: "quotation", quotation })), - ); + } as Phrase)), + quotation().map(( + quotation, + ) => ({ type: "quotation", quotation } as Phrase)), + ).filter(filter(PHRASE_RULE)); } /** * Parses nested phrases with given nesting rule, only accepting the top level @@ -428,7 +432,7 @@ function preposition(): Parser { preposition, modifiers, phrases, - })); + })).filter(filter(PREPOSITION_RULE)); } /** * Parses associated predicates whose predicates only uses top level operator. From 43e48bdb692115ee699127937382dda6db501d2b Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 15:07:07 +0800 Subject: [PATCH 188/271] fix error regarding modifiers of preverb and preposition --- src/filter.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index 183a6fc..f39544d 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -145,7 +145,7 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ // Disallow preverb modifiers other than _ala_ (phrase) => { if (phrase.type === "preverb") { - if (!modifiersIsAla(phrase.modifiers)) { + if (!modifiersIsAlaOrNone(phrase.modifiers)) { throw new UnrecognizedError('preverb with modifiers other than "ala"'); } } @@ -171,7 +171,7 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ // Disallow preverb modifiers other than _ala_ (preposition) => { - if (!modifiersIsAla(preposition.modifiers)) { + if (!modifiersIsAlaOrNone(preposition.modifiers)) { throw new UnrecognizedError('preverb with modifiers other than "ala"'); } return true; @@ -193,8 +193,8 @@ function modifierIsNumeric(modifier: Modifier): boolean { } return false; } -/** Helper function for checking if the modifiers is exactly just _ala_. */ -function modifiersIsAla(modifiers: Array): boolean { +/** Helper function for checking if the modifiers is exactly just _ala_ or nothing. */ +function modifiersIsAlaOrNone(modifiers: Array): boolean { if (modifiers.length > 1) { return false; } else if (modifiers.length === 1) { @@ -202,7 +202,7 @@ function modifiersIsAla(modifiers: Array): boolean { return modifier.type === "default" && modifier.word.type === "default" && modifier.word.word === "ala"; } - return false; + return true; } /** Checks if modifiers has _pi_. */ function modifiersHasPi(modifiers: Array): boolean { From f6b7fe24e1ebc7e044285ab8d94fafdf9a00c570 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 15:12:41 +0800 Subject: [PATCH 189/271] disallow multiple sentences --- src/filter.ts | 11 +++++++++++ src/parser.ts | 7 +++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index f39544d..d9dd02e 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -3,6 +3,7 @@ import { MultiplePhrases, Phrase, Preposition, + Sentence, WordUnit, } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; @@ -177,6 +178,16 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ return true; }, ]; +/** Array of filter rules for multiple sentences. */ +export const SENTENCES_RULE: Array<(sentences: Array) => boolean> = [ + // Only allow at most 2 sentences + (sentences) => { + if (sentences.length > 2) { + throw new UnrecognizedError("Multiple sentences"); + } + return true; + }, +]; /** Helper function for generating filter function. */ export function filter( rules: Array<(value: T) => boolean>, diff --git a/src/parser.ts b/src/parser.ts index d83c0d2..b188994 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -24,6 +24,7 @@ import { MODIFIERS_RULES, PHRASE_RULE, PREPOSITION_RULE, + SENTENCES_RULE, WORD_UNIT_RULES, } from "./filter.ts"; @@ -624,7 +625,7 @@ function closeQuotationMark(): Parser { function quotation(): Parser { return sequence( openQuotationMark(), - many(lazy(sentence)), + many(lazy(sentence)).filter(filter(SENTENCES_RULE)), closeQuotationMark(), ).map(([leftMark, sentences, rightMark]) => { if (leftMark === '"' || leftMark === "“") { @@ -645,6 +646,8 @@ function quotation(): Parser { } /** A multiple Toki Pona sentence parser. */ export function parser(src: string): Output> { - return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()).parser(src) + return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()).filter( + filter(SENTENCES_RULE), + ).parser(src) .map(({ value }) => value); } From 24620199e2831319c285a3c0daddbb4c798c6d90 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 15:19:00 +0800 Subject: [PATCH 190/271] disallow quotation as phrase --- src/filter.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index d9dd02e..ed2872b 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -143,6 +143,13 @@ export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [ ]; /** Array of filter rules for a single phrase. */ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ + // Disallow quotation + (phrase) => { + if (phrase.type === "quotation") { + throw new UnrecognizedError("quotation as phrase"); + } + return true; + }, // Disallow preverb modifiers other than _ala_ (phrase) => { if (phrase.type === "preverb") { From 4b73a6375deb7e4dda098f6c045f9fa4ab76d462 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 15:31:08 +0800 Subject: [PATCH 191/271] improve optional comma parsing for prepositions --- src/parser.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index b188994..b001072 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -448,7 +448,7 @@ function associatedPredicates( nestedPhrases(["e", "anu"]), ), ), - many(preposition()), + many(optionalComma().with(preposition())), ).map(([predicates, objects, prepositions]) => { if (!objects && prepositions.length === 0) { throw new UnreachableError(); @@ -523,9 +523,13 @@ function clause(): Parser { }, predicates, } as Clause)), - manyAtLeastOnce(optionalComma().with(preposition())).map(( - prepositions, - ) => ({ type: "prepositions", prepositions })), + sequence( + preposition(), + many(optionalComma().with(preposition())), + ).map(([preposition, morePreposition]) => ({ + type: "prepositions", + prepositions: [preposition, ...morePreposition], + })), subjectPhrases().map((phrases) => { if ( phrases.type === "single" && From 811b085c7c1390fcc4bdc131304497be993d3d3c Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 16:03:25 +0800 Subject: [PATCH 192/271] fix filter --- src/filter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index ed2872b..b9a2863 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -20,7 +20,7 @@ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ // avoid reduplication of "wan" and "tu" (wordUnit) => { if ( - wordUnit.type === "x ala x" && + wordUnit.type === "reduplication" && (wordUnit.word === "wan" || wordUnit.word === "tu") ) { throw new UnrecognizedError(`reduplication of ${wordUnit.word}`); From c714f08991632bfe0d3ee62d3eb2c5b53d53665d Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 16:24:02 +0800 Subject: [PATCH 193/271] Improve translation list --- src/translation.ts | 898 +++++++++++++++++++++++++++------------------ 1 file changed, 549 insertions(+), 349 deletions(-) diff --git a/src/translation.ts b/src/translation.ts index 8e3fafa..25caab8 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -1,351 +1,551 @@ -/** Noun translations. */ -export const NOUN = { - akesi: ["reptile", "reptiles", "amphibian", "amphibians"], - ala: ["nothing", "no"], - alasa: ["searching"], - ale: ["everything"], - ali: ["everything"], - anpa: ["bottom", "bottoms", "under"], - ante: ["changing"], - awen: ["staying"], - esun: ["shop", "shops"], - ijo: ["thing", "things"], - ike: ["badness"], - ilo: ["tool", "tools"], - insa: ["inside", "insides"], - jaki: ["obscenity", "obscenities"], - jan: ["person", "people", "human", "humans", "humanity"], - jelo: ["yellowness"], - jo: ["possession", "possessions"], - kala: ["fish", "fishes"], - kalama: ["sound", "sounds"], - kama: ["arriving"], - kasi: ["plant", "plants"], - ken: ["ability", "abilities", "possibility", "possibilities"], - kepeken: [], - kili: ["fruit", "fruits", "vegetable", "vegetables"], - kiwen: ["hard thing", "hard things"], - ko: ["soft thing", "soft things", "powder"], - kon: ["air", "essence"], - kule: ["color", "colors"], - kulupu: ["group", "groups"], - kute: ["ear", "ears", "listening"], - lape: ["sleep", "rest"], - laso: ["blueness", "greenness"], - lawa: ["head", "heads", "control", "controls"], - len: ["cloth", "clothes", "hiding"], - lete: ["coldness"], - lili: ["smallness"], - linja: ["long flexible thing", "long flexible things"], - lipu: ["book", "books", "paper", "paper-like thing", "paper-like things"], - loje: ["redness"], - lon: ["truth", "true"], - luka: ["hand", "hands", "arm", "arms"], - lukin: ["eye", "eyes", "sight"], - lupa: ["hole", "holes"], - ma: ["place", "places", "earth"], - mama: ["parent", "parents", "creator", "creators"], - mani: ["money", "large domestic animal", "large domestic animals"], - meli: ["woman", "women", "feminity"], - mi: ["I", "me", "we", "us"], - mije: ["man", "men", "masculinity"], - moku: ["food", "foods", "drink", "drinks"], - moli: ["death"], - monsi: ["back"], - mu: ["moo"], - mun: ["celestial object", "celestial objects", "glowing thing"], - musi: ["entertainment", "entertainments"], - mute: ["many"], - nanpa: ["number", "numbers"], - nasa: ["silliness", "strangeness"], - nasin: ["way"], - nena: ["bump"], - ni: ["this", "that"], - nimi: ["name", "names", "word", "words"], - noka: ["foot", "feet", "leg", "legs"], - olin: ["love"], - ona: ["they", "them", "it"], - open: ["beginning", "beginnings"], - pakala: ["mistake", "mistakes"], - pan: ["grain", "grains"], - pana: ["giving"], - pali: ["work"], - palisa: ["long hard thing", "long hard things"], - pilin: ["emotion", "emotions"], - pimeja: ["blackness", "brownness", "grayness"], - pini: ["end", "ends"], - pipi: ["insect", "insects", "bug", "bugs"], - poka: ["side", "sides", "hips"], - poki: ["container"], - pona: ["goodness", "simplicity"], - sama: ["similarity"], - seli: ["fire", "heat", "chemical reaction", "chemical reactions"], - selo: ["outer form", "skin", "boundary", "boundaries"], - seme: ["what", "which"], - sewi: ["above", "divinity"], - sijelo: ["body", "bodies"], - sike: ["round thing", "round things", "cycle"], - sin: ["new thing", "new things"], - sina: ["you", "you all"], - sinpin: ["face", "faces", "wall", "walls"], - sitelen: ["writing", "writings", "image", "images"], - sona: ["knowledge"], - soweli: ["animal", "animals"], - suli: ["hugeness", "importance"], - suno: ["light source", "light sources", "sun"], - supa: ["horizontal surface", "horizontal surfaces"], - suwi: ["sweetness", "cuteness", "innocence"], - tan: ["reason", "origin"], - taso: [], - tawa: ["movement"], - telo: ["liquid"], - tenpo: ["time"], - toki: ["communication", "communications", "language", "languages", "hello"], - tomo: ["house", "houses"], - tonsi: [ - "transgender person", - "transgender people", - "non-binary person", - "non-binary people", - ], - tu: ["pair"], - unpa: ["sex"], - uta: ["mouth"], - utala: ["conflict", "difficulty"], - walo: ["whiteness", "paleness"], - wan: ["one"], - waso: ["bird", "birds"], - wawa: ["power", "powers"], - weka: ["leaving"], - wile: ["want", "wants", "need", "needs"], +/** Represents possible translations of words. */ +export type Translation = { + noun: Array; + adjective: Array; + adverb: Array; }; -/** Adjective translations. */ -export const ADJECTIVE = { - akesi: ["reptilian", "amphibian"], - ala: ["not", "no"], - alasa: [], - ale: ["all"], - ali: ["all"], - anpa: ["bottom"], - ante: ["different", "other"], - awen: ["staying"], - esun: [], - ijo: [], - ike: ["bad"], - ilo: [], - insa: [], - jaki: ["gross"], - jan: ["person-like"], - jelo: ["yellow"], - jo: [], - kala: ["fish-like"], - kalama: ["sounding"], - kama: ["arriving"], - kasi: ["plant-like"], - ken: [], - kepeken: [], - kili: [], - kiwen: ["hard"], - ko: ["soft"], - kon: [], - kule: ["colorful"], - kulupu: [], - kute: [], - lape: ["sleeping"], - laso: ["blue", "green"], - lawa: ["controlling"], - len: ["hidden"], - lete: ["cold", "uncooked"], - lili: ["small"], - linja: ["long flexible"], - lipu: ["paper-like"], - loje: ["red"], - lon: ["truthful"], - luka: [], - lukin: [], - lupa: [], - ma: ["earthy"], - mama: [], - mani: [], - meli: ["woman", "feminine"], - mi: ["my", "our"], - mije: ["man", "masculine"], - moku: [], - moli: ["dead", "deadly"], - monsi: [], - mu: ["mooing"], - mun: ["glowing"], - musi: ["entertaining"], - mute: ["many"], - nanpa: ["numeric"], - nasa: ["silly", "strange"], - nasin: [], - nena: [], - ni: ["this", "that"], - nimi: [], - noka: [], - olin: [], - ona: ["their", "its"], - open: [], - pakala: ["broken"], - pan: [], - pana: [], - pali: ["working"], - palisa: ["long hard"], - pilin: [], - pimeja: ["black", "brown", "gray"], - pini: ["ended"], - pipi: ["bug-like", "insect-like"], - poka: [], - poki: [], - pona: ["good", "simple"], - sama: [], - seli: ["hot"], - selo: [], - seme: ["what", "which"], - sewi: ["divine"], - sijelo: [], - sike: ["round"], - sin: ["new"], - sina: ["your"], - sinpin: [], - sitelen: [], - sona: ["knowledgeable"], - soweli: ["animal-like"], - suli: ["huge", "important"], - suno: ["shining"], - supa: [], - suwi: ["sweet", "cute", "innocent"], - tan: [], - tawa: ["moving"], - telo: ["liquid"], - tenpo: [], - toki: ["communicating"], - tomo: [], - tonsi: ["transgender", "non-binary"], - tu: ["two"], - unpa: ["sexual"], - uta: [], - utala: ["conflicting", "difficult"], - walo: ["white", "pale"], - wan: ["one"], - waso: ["bird-like"], - wawa: ["powerful"], - weka: ["leaving"], - wile: [], -}; -/** Adverb translations. */ -export const ADVERB = { - akesi: [], - ala: ["not"], - alasa: [], - ale: ["completely"], - ali: ["completely"], - anpa: [], - ante: ["differently"], - awen: [], - esun: [], - ijo: [], - ike: ["badly"], - ilo: [], - insa: [], - jaki: ["disgustingly"], - jan: [], - jelo: [], - jo: [], - kala: [], - kalama: [], - kama: [], - kasi: [], - ken: [], - kepeken: [], - kili: [], - kiwen: [], - ko: [], - kon: [], - kule: ["colorfully"], - kulupu: [], - kute: [], - lape: [], - laso: [], - lawa: [], - len: [], - lete: [], - lili: ["slightly"], - linja: [], - lipu: [], - loje: [], - lon: ["truthfully"], - luka: [], - lukin: [], - lupa: [], - ma: [], - mama: [], - mani: [], - meli: [], - mi: [], - mije: [], - moku: [], - moli: [], - monsi: [], - mu: [], - mun: [], - musi: ["entertainingly"], - mute: ["very"], - nanpa: ["numerically"], - nasa: ["strangely"], - nasin: [], - nena: [], - ni: [], - nimi: [], - noka: [], - olin: [], - ona: [], - open: [], - pakala: [], - pan: [], - pana: [], - pali: [], - palisa: [], - pilin: [], - pimeja: [], - pini: [], - pipi: [], - poka: [], - poki: [], - pona: ["nicely"], - sama: ["equally"], - seli: [], - selo: [], - seme: [], - sewi: ["divinely"], - sijelo: [], - sike: ["repeatedly"], - sin: ["newly"], - sina: [], - sinpin: [], - sitelen: [], - sona: [], - soweli: [], - suli: ["hugely", "importantly"], - suno: [], - supa: [], - suwi: ["sweetly"], - tan: [], - tawa: [], - telo: [], - tenpo: [], - toki: [], - tomo: [], - tonsi: [], - tu: [], - unpa: ["sexually"], - uta: [], - utala: ["conflictingly", "difficultly"], - walo: [], - wan: [], - waso: [], - wawa: ["powerfully"], - weka: [], - wile: [], +/** Record of word translations. */ +export const TRANSLATION: { [key: string]: Translation } = { + akesi: { + noun: ["reptile", "reptiles", "amphibian", "amphibians"], + adjective: ["reptile", "reptiles", "amphibian", "amphibians"], + adverb: ["reptile", "reptiles", "amphibian", "amphibians"], + }, + ala: { + noun: ["nothing", "no"], + adjective: ["nothing", "no"], + adverb: ["nothing", "no"], + }, + alasa: { + noun: ["searching"], + adjective: ["searching"], + adverb: ["searching"], + }, + ale: { + noun: ["everything"], + adjective: ["everything"], + adverb: ["everything"], + }, + ali: { + noun: ["everything"], + adjective: ["everything"], + adverb: ["everything"], + }, + anpa: { + noun: ["bottom", "bottoms", "under"], + adjective: ["bottom", "bottoms", "under"], + adverb: ["bottom", "bottoms", "under"], + }, + ante: { + noun: ["changing"], + adjective: ["changing"], + adverb: ["changing"], + }, + awen: { + noun: ["staying"], + adjective: ["staying"], + adverb: ["staying"], + }, + esun: { + noun: ["shop", "shops"], + adjective: ["shop", "shops"], + adverb: ["shop", "shops"], + }, + ijo: { + noun: ["thing", "things"], + adjective: ["thing", "things"], + adverb: ["thing", "things"], + }, + ike: { + noun: ["badness"], + adjective: ["badness"], + adverb: ["badness"], + }, + ilo: { + noun: ["tool", "tools"], + adjective: ["tool", "tools"], + adverb: ["tool", "tools"], + }, + insa: { + noun: ["inside", "insides"], + adjective: ["inside", "insides"], + adverb: ["inside", "insides"], + }, + jaki: { + noun: ["obscenity", "obscenities"], + adjective: ["obscenity", "obscenities"], + adverb: ["obscenity", "obscenities"], + }, + jan: { + noun: ["person", "people", "human", "humans", "humanity"], + adjective: ["person", "people", "human", "humans", "humanity"], + adverb: ["person", "people", "human", "humans", "humanity"], + }, + jelo: { + noun: ["yellowness"], + adjective: ["yellowness"], + adverb: ["yellowness"], + }, + jo: { + noun: ["possession", "possessions"], + adjective: ["possession", "possessions"], + adverb: ["possession", "possessions"], + }, + kala: { + noun: ["fish", "fishes"], + adjective: ["fish", "fishes"], + adverb: ["fish", "fishes"], + }, + kalama: { + noun: ["sound", "sounds"], + adjective: ["sound", "sounds"], + adverb: ["sound", "sounds"], + }, + kama: { + noun: ["arriving"], + adjective: ["arriving"], + adverb: ["arriving"], + }, + kasi: { + noun: ["plant", "plants"], + adjective: ["plant", "plants"], + adverb: ["plant", "plants"], + }, + ken: { + noun: ["ability", "abilities", "possibility", "possibilities"], + adjective: ["ability", "abilities", "possibility", "possibilities"], + adverb: ["ability", "abilities", "possibility", "possibilities"], + }, + kepeken: { noun: [], adjective: [], adverb: [] }, + kili: { + noun: ["fruit", "fruits", "vegetable", "vegetables"], + adjective: ["fruit", "fruits", "vegetable", "vegetables"], + adverb: ["fruit", "fruits", "vegetable", "vegetables"], + }, + kiwen: { + noun: ["hard thing", "hard things"], + adjective: ["hard thing", "hard things"], + adverb: ["hard thing", "hard things"], + }, + ko: { + noun: ["soft thing", "soft things", "powder"], + adjective: ["soft thing", "soft things", "powder"], + adverb: ["soft thing", "soft things", "powder"], + }, + kon: { + noun: ["air", "essence"], + adjective: ["air", "essence"], + adverb: ["air", "essence"], + }, + kule: { + noun: ["color", "colors"], + adjective: ["color", "colors"], + adverb: ["color", "colors"], + }, + kulupu: { + noun: ["group", "groups"], + adjective: ["group", "groups"], + adverb: ["group", "groups"], + }, + kute: { + noun: ["ear", "ears", "listening"], + adjective: ["ear", "ears", "listening"], + adverb: ["ear", "ears", "listening"], + }, + lape: { + noun: ["sleep", "rest"], + adjective: ["sleep", "rest"], + adverb: ["sleep", "rest"], + }, + laso: { + noun: ["blueness", "greenness"], + adjective: ["blueness", "greenness"], + adverb: ["blueness", "greenness"], + }, + lawa: { + noun: ["head", "heads", "control", "controls"], + adjective: ["head", "heads", "control", "controls"], + adverb: ["head", "heads", "control", "controls"], + }, + len: { + noun: ["cloth", "clothes", "hiding"], + adjective: ["cloth", "clothes", "hiding"], + adverb: ["cloth", "clothes", "hiding"], + }, + lete: { + noun: ["coldness"], + adjective: ["coldness"], + adverb: ["coldness"], + }, + lili: { + noun: ["smallness"], + adjective: ["smallness"], + adverb: ["smallness"], + }, + linja: { + noun: ["long flexible thing", "long flexible things"], + adjective: ["long flexible thing", "long flexible things"], + adverb: ["long flexible thing", "long flexible things"], + }, + lipu: { + noun: ["book", "books", "paper", "paper-like thing", "paper-like things"], + adjective: [ + "book", + "books", + "paper", + "paper-like thing", + "paper-like things", + ], + adverb: [ + "book", + "books", + "paper", + "paper-like thing", + "paper-like things", + ], + }, + loje: { + noun: ["redness"], + adjective: ["redness"], + adverb: ["redness"], + }, + lon: { + noun: ["truth", "true"], + adjective: ["truth", "true"], + adverb: ["truth", "true"], + }, + luka: { + noun: ["hand", "hands", "arm", "arms"], + adjective: ["hand", "hands", "arm", "arms"], + adverb: ["hand", "hands", "arm", "arms"], + }, + lukin: { + noun: ["eye", "eyes", "sight"], + adjective: ["eye", "eyes", "sight"], + adverb: ["eye", "eyes", "sight"], + }, + lupa: { + noun: ["hole", "holes"], + adjective: ["hole", "holes"], + adverb: ["hole", "holes"], + }, + ma: { + noun: ["place", "places", "earth"], + adjective: ["place", "places", "earth"], + adverb: ["place", "places", "earth"], + }, + mama: { + noun: ["parent", "parents", "creator", "creators"], + adjective: ["parent", "parents", "creator", "creators"], + adverb: ["parent", "parents", "creator", "creators"], + }, + mani: { + noun: ["money", "large domestic animal", "large domestic animals"], + adjective: ["money", "large domestic animal", "large domestic animals"], + adverb: ["money", "large domestic animal", "large domestic animals"], + }, + meli: { + noun: ["woman", "women", "feminity"], + adjective: ["woman", "women", "feminity"], + adverb: ["woman", "women", "feminity"], + }, + mi: { + noun: ["I", "me", "we", "us"], + adjective: ["I", "me", "we", "us"], + adverb: ["I", "me", "we", "us"], + }, + mije: { + noun: ["man", "men", "masculinity"], + adjective: ["man", "men", "masculinity"], + adverb: ["man", "men", "masculinity"], + }, + moku: { + noun: ["food", "foods", "drink", "drinks"], + adjective: ["food", "foods", "drink", "drinks"], + adverb: ["food", "foods", "drink", "drinks"], + }, + moli: { noun: ["death"], adjective: ["death"], adverb: ["death"] }, + monsi: { noun: ["back"], adjective: ["back"], adverb: ["back"] }, + mu: { noun: ["moo"], adjective: ["moo"], adverb: ["moo"] }, + mun: { + noun: ["celestial object", "celestial objects", "glowing thing"], + adjective: ["celestial object", "celestial objects", "glowing thing"], + adverb: ["celestial object", "celestial objects", "glowing thing"], + }, + musi: { + noun: ["entertainment", "entertainments"], + adjective: ["entertainment", "entertainments"], + adverb: ["entertainment", "entertainments"], + }, + mute: { noun: ["many"], adjective: ["many"], adverb: ["many"] }, + nanpa: { + noun: ["number", "numbers"], + adjective: ["number", "numbers"], + adverb: ["number", "numbers"], + }, + nasa: { + noun: ["silliness", "strangeness"], + adjective: ["silliness", "strangeness"], + adverb: ["silliness", "strangeness"], + }, + nasin: { noun: ["way"], adjective: ["way"], adverb: ["way"] }, + nena: { noun: ["bump"], adjective: ["bump"], adverb: ["bump"] }, + ni: { + noun: ["this", "that"], + adjective: ["this", "that"], + adverb: ["this", "that"], + }, + nimi: { + noun: ["name", "names", "word", "words"], + adjective: ["name", "names", "word", "words"], + adverb: ["name", "names", "word", "words"], + }, + noka: { + noun: ["foot", "feet", "leg", "legs"], + adjective: ["foot", "feet", "leg", "legs"], + adverb: ["foot", "feet", "leg", "legs"], + }, + olin: { noun: ["love"], adjective: ["love"], adverb: ["love"] }, + ona: { + noun: ["they", "them", "it"], + adjective: ["they", "them", "it"], + adverb: ["they", "them", "it"], + }, + open: { + noun: ["beginning", "beginnings"], + adjective: ["beginning", "beginnings"], + adverb: ["beginning", "beginnings"], + }, + pakala: { + noun: ["mistake", "mistakes"], + adjective: ["mistake", "mistakes"], + adverb: ["mistake", "mistakes"], + }, + pan: { + noun: ["grain", "grains"], + adjective: ["grain", "grains"], + adverb: ["grain", "grains"], + }, + pana: { noun: ["giving"], adjective: ["giving"], adverb: ["giving"] }, + pali: { noun: ["work"], adjective: ["work"], adverb: ["work"] }, + palisa: { + noun: ["long hard thing", "long hard things"], + adjective: ["long hard thing", "long hard things"], + adverb: ["long hard thing", "long hard things"], + }, + pilin: { + noun: ["emotion", "emotions"], + adjective: ["emotion", "emotions"], + adverb: ["emotion", "emotions"], + }, + pimeja: { + noun: ["blackness", "brownness", "grayness"], + adjective: ["blackness", "brownness", "grayness"], + adverb: ["blackness", "brownness", "grayness"], + }, + pini: { + noun: ["end", "ends"], + adjective: ["end", "ends"], + adverb: ["end", "ends"], + }, + pipi: { + noun: ["insect", "insects", "bug", "bugs"], + adjective: ["insect", "insects", "bug", "bugs"], + adverb: ["insect", "insects", "bug", "bugs"], + }, + poka: { + noun: ["side", "sides", "hips"], + adjective: ["side", "sides", "hips"], + adverb: ["side", "sides", "hips"], + }, + poki: { + noun: ["container"], + adjective: ["container"], + adverb: ["container"], + }, + pona: { + noun: ["goodness", "simplicity"], + adjective: ["goodness", "simplicity"], + adverb: ["goodness", "simplicity"], + }, + sama: { + noun: ["similarity"], + adjective: ["similarity"], + adverb: ["similarity"], + }, + seli: { + noun: ["fire", "heat", "chemical reaction", "chemical reactions"], + adjective: ["fire", "heat", "chemical reaction", "chemical reactions"], + adverb: ["fire", "heat", "chemical reaction", "chemical reactions"], + }, + selo: { + noun: ["outer form", "skin", "boundary", "boundaries"], + adjective: ["outer form", "skin", "boundary", "boundaries"], + adverb: ["outer form", "skin", "boundary", "boundaries"], + }, + seme: { + noun: ["what", "which"], + adjective: ["what", "which"], + adverb: ["what", "which"], + }, + sewi: { + noun: ["above", "divinity"], + adjective: ["above", "divinity"], + adverb: ["above", "divinity"], + }, + sijelo: { + noun: ["body", "bodies"], + adjective: ["body", "bodies"], + adverb: ["body", "bodies"], + }, + sike: { + noun: ["round thing", "round things", "cycle"], + adjective: ["round thing", "round things", "cycle"], + adverb: ["round thing", "round things", "cycle"], + }, + sin: { + noun: ["new thing", "new things"], + adjective: ["new thing", "new things"], + adverb: ["new thing", "new things"], + }, + sina: { + noun: ["you", "you all"], + adjective: ["you", "you all"], + adverb: ["you", "you all"], + }, + sinpin: { + noun: ["face", "faces", "wall", "walls"], + adjective: ["face", "faces", "wall", "walls"], + adverb: ["face", "faces", "wall", "walls"], + }, + sitelen: { + noun: ["writing", "writings", "image", "images"], + adjective: ["writing", "writings", "image", "images"], + adverb: ["writing", "writings", "image", "images"], + }, + sona: { + noun: ["knowledge"], + adjective: ["knowledge"], + adverb: ["knowledge"], + }, + soweli: { + noun: ["animal", "animals"], + adjective: ["animal", "animals"], + adverb: ["animal", "animals"], + }, + suli: { + noun: ["hugeness", "importance"], + adjective: ["hugeness", "importance"], + adverb: ["hugeness", "importance"], + }, + suno: { + noun: ["light source", "light sources", "sun"], + adjective: ["light source", "light sources", "sun"], + adverb: ["light source", "light sources", "sun"], + }, + supa: { + noun: ["horizontal surface", "horizontal surfaces"], + adjective: ["horizontal surface", "horizontal surfaces"], + adverb: ["horizontal surface", "horizontal surfaces"], + }, + suwi: { + noun: ["sweetness", "cuteness", "innocence"], + adjective: ["sweetness", "cuteness", "innocence"], + adverb: ["sweetness", "cuteness", "innocence"], + }, + tan: { + noun: ["reason", "origin"], + adjective: ["reason", "origin"], + adverb: ["reason", "origin"], + }, + taso: { noun: [], adjective: [], adverb: [] }, + tawa: { + noun: ["movement"], + adjective: ["movement"], + adverb: ["movement"], + }, + telo: { noun: ["liquid"], adjective: ["liquid"], adverb: ["liquid"] }, + tenpo: { noun: ["time"], adjective: ["time"], adverb: ["time"] }, + toki: { + noun: [ + "communication", + "communications", + "language", + "languages", + "hello", + ], + adjective: [ + "communication", + "communications", + "language", + "languages", + "hello", + ], + adverb: [ + "communication", + "communications", + "language", + "languages", + "hello", + ], + }, + tomo: { + noun: ["house", "houses"], + adjective: ["house", "houses"], + adverb: ["house", "houses"], + }, + tonsi: { + noun: [ + "transgender person", + "transgender people", + "non-binary person", + "non-binary people", + ], + adjective: [ + "transgender person", + "transgender people", + "non-binary person", + "non-binary people", + ], + adverb: [ + "transgender person", + "transgender people", + "non-binary person", + "non-binary people", + ], + }, + tu: { noun: ["pair"], adjective: ["pair"], adverb: ["pair"] }, + unpa: { noun: ["sex"], adjective: ["sex"], adverb: ["sex"] }, + uta: { noun: ["mouth"], adjective: ["mouth"], adverb: ["mouth"] }, + utala: { + noun: ["conflict", "difficulty"], + adjective: ["conflict", "difficulty"], + adverb: ["conflict", "difficulty"], + }, + walo: { + noun: ["whiteness", "paleness"], + adjective: ["whiteness", "paleness"], + adverb: ["whiteness", "paleness"], + }, + wan: { noun: ["one"], adjective: ["one"], adverb: ["one"] }, + waso: { + noun: ["bird", "birds"], + adjective: ["bird", "birds"], + adverb: ["bird", "birds"], + }, + wawa: { + noun: ["power", "powers"], + adjective: ["power", "powers"], + adverb: ["power", "powers"], + }, + weka: { + noun: ["leaving"], + adjective: ["leaving"], + adverb: ["leaving"], + }, + wile: { + noun: ["want", "wants", "need", "needs"], + adjective: ["want", "wants", "need", "needs"], + adverb: ["want", "wants", "need", "needs"], + }, }; From 805fae62ead7193a366f890bd1d16f26e9944c79 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 17:36:55 +0800 Subject: [PATCH 194/271] implement filter for full clause --- src/filter.ts | 10 ++++++++++ src/parser.ts | 3 ++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index b9a2863..eeba9a0 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,4 +1,5 @@ import { + FullClause, Modifier, MultiplePhrases, Phrase, @@ -185,6 +186,15 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ return true; }, ]; +export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [ + // Prevent "taso ala taso" + (fullClause) => { + if (fullClause.taso && fullClause.taso.type === "x ala x") { + throw new UnrecognizedError('"taso ala taso"'); + } + return true; + }, +]; /** Array of filter rules for multiple sentences. */ export const SENTENCES_RULE: Array<(sentences: Array) => boolean> = [ // Only allow at most 2 sentences diff --git a/src/parser.ts b/src/parser.ts index b001072..c1ea679 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -20,6 +20,7 @@ import { } from "./vocabulary.ts"; import { filter, + FULL_CLAUSE_RULE, MODIFIER_RULES, MODIFIERS_RULES, PHRASE_RULE, @@ -592,7 +593,7 @@ function fullClause(): Parser { taso, anuSeme, clause, - })); + })).filter(filter(FULL_CLAUSE_RULE)); } /** parses _la_ with optional comma around. */ function la(): Parser { From 0a84cf4ac0e070d7acea069ab78303ad7dbe99a3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 17:39:23 +0800 Subject: [PATCH 195/271] implement translator for multiple sentences (sentence translator itself is TODO) --- src/translator.ts | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 72c886b..edfaaaf 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,12 +1,26 @@ import { Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; + type TranslationOutput = Output; -function translateSentence(output: Sentence): TranslationOutput { +function translateSentence(sentence: Sentence): TranslationOutput { throw new Error("todo"); } -function translate(src: string): TranslationOutput { - return parser(src).flatMap((sentences) => - new Output(sentences).flatMap(translateSentence) +function translateSentences(sentences: Array): TranslationOutput { + return sentences.reduce( + (output, sentence) => + output.flatMap((left) => + translateSentence(sentence).map((right) => { + if (left === "") { + return right; + } else { + return [left, right].join(" "); + } + }) + ), + new Output([""]), ); } +function translate(src: string): TranslationOutput { + return parser(src).flatMap(translateSentences); +} From 551bdaa5d6902b49001b24f1ef809ab8b33c706c Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 17:48:11 +0800 Subject: [PATCH 196/271] comments and exports --- src/translator.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index edfaaaf..b2bb4b6 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -2,10 +2,14 @@ import { Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; -type TranslationOutput = Output; +/** A special kind of Output that translators returns. */ +export type TranslationOutput = Output; + +/** Translates a single sentence. */ function translateSentence(sentence: Sentence): TranslationOutput { throw new Error("todo"); } +/** Translates multiple sentences. */ function translateSentences(sentences: Array): TranslationOutput { return sentences.reduce( (output, sentence) => @@ -21,6 +25,7 @@ function translateSentences(sentences: Array): TranslationOutput { new Output([""]), ); } -function translate(src: string): TranslationOutput { +/** Full Toki Pona translator. */ +export function translate(src: string): TranslationOutput { return parser(src).flatMap(translateSentences); } From c117044b7f74f5b730b404b69cd49b7c3f9df1d9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 18:16:56 +0800 Subject: [PATCH 197/271] fix translation list --- src/translation.ts | 509 ++++++++++++++++----------------------------- 1 file changed, 182 insertions(+), 327 deletions(-) diff --git a/src/translation.ts b/src/translation.ts index 25caab8..45ea282 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -8,464 +8,354 @@ export type Translation = { export const TRANSLATION: { [key: string]: Translation } = { akesi: { noun: ["reptile", "reptiles", "amphibian", "amphibians"], - adjective: ["reptile", "reptiles", "amphibian", "amphibians"], - adverb: ["reptile", "reptiles", "amphibian", "amphibians"], + adjective: ["reptilian", "amphibian"], + adverb: [], }, ala: { noun: ["nothing", "no"], - adjective: ["nothing", "no"], - adverb: ["nothing", "no"], - }, - alasa: { - noun: ["searching"], - adjective: ["searching"], - adverb: ["searching"], + adjective: ["not", "no"], + adverb: ["not"], }, + alasa: { noun: ["searching"], adjective: [], adverb: [] }, ale: { noun: ["everything"], - adjective: ["everything"], - adverb: ["everything"], + adjective: ["all"], + adverb: ["completely"], }, ali: { noun: ["everything"], - adjective: ["everything"], - adverb: ["everything"], + adjective: ["all"], + adverb: ["completely"], }, anpa: { noun: ["bottom", "bottoms", "under"], - adjective: ["bottom", "bottoms", "under"], - adverb: ["bottom", "bottoms", "under"], + adjective: ["bottom"], + adverb: [], }, ante: { noun: ["changing"], - adjective: ["changing"], - adverb: ["changing"], - }, - awen: { - noun: ["staying"], - adjective: ["staying"], - adverb: ["staying"], - }, - esun: { - noun: ["shop", "shops"], - adjective: ["shop", "shops"], - adverb: ["shop", "shops"], - }, - ijo: { - noun: ["thing", "things"], - adjective: ["thing", "things"], - adverb: ["thing", "things"], - }, - ike: { - noun: ["badness"], - adjective: ["badness"], - adverb: ["badness"], - }, - ilo: { - noun: ["tool", "tools"], - adjective: ["tool", "tools"], - adverb: ["tool", "tools"], - }, - insa: { - noun: ["inside", "insides"], - adjective: ["inside", "insides"], - adverb: ["inside", "insides"], - }, + adjective: ["different", "other"], + adverb: ["differently"], + }, + awen: { noun: ["staying"], adjective: ["staying"], adverb: [] }, + esun: { noun: ["shop", "shops"], adjective: [], adverb: [] }, + ijo: { noun: ["thing", "things"], adjective: [], adverb: [] }, + ike: { noun: ["badness"], adjective: ["bad"], adverb: ["badly"] }, + ilo: { noun: ["tool", "tools"], adjective: [], adverb: [] }, + insa: { noun: ["inside", "insides"], adjective: [], adverb: [] }, jaki: { noun: ["obscenity", "obscenities"], - adjective: ["obscenity", "obscenities"], - adverb: ["obscenity", "obscenities"], + adjective: ["gross"], + adverb: ["disgustingly"], }, jan: { noun: ["person", "people", "human", "humans", "humanity"], - adjective: ["person", "people", "human", "humans", "humanity"], - adverb: ["person", "people", "human", "humans", "humanity"], - }, - jelo: { - noun: ["yellowness"], - adjective: ["yellowness"], - adverb: ["yellowness"], + adjective: ["person-like"], + adverb: [], }, + jelo: { noun: ["yellowness"], adjective: ["yellow"], adverb: [] }, jo: { noun: ["possession", "possessions"], - adjective: ["possession", "possessions"], - adverb: ["possession", "possessions"], + adjective: [], + adverb: [], }, kala: { noun: ["fish", "fishes"], - adjective: ["fish", "fishes"], - adverb: ["fish", "fishes"], + adjective: ["fish-like"], + adverb: [], }, kalama: { noun: ["sound", "sounds"], - adjective: ["sound", "sounds"], - adverb: ["sound", "sounds"], - }, - kama: { - noun: ["arriving"], - adjective: ["arriving"], - adverb: ["arriving"], + adjective: ["sounding"], + adverb: [], }, + kama: { noun: ["arriving"], adjective: ["arriving"], adverb: [] }, kasi: { noun: ["plant", "plants"], - adjective: ["plant", "plants"], - adverb: ["plant", "plants"], + adjective: ["plant-like"], + adverb: [], }, ken: { noun: ["ability", "abilities", "possibility", "possibilities"], - adjective: ["ability", "abilities", "possibility", "possibilities"], - adverb: ["ability", "abilities", "possibility", "possibilities"], + adjective: [], + adverb: [], }, - kepeken: { noun: [], adjective: [], adverb: [] }, kili: { noun: ["fruit", "fruits", "vegetable", "vegetables"], - adjective: ["fruit", "fruits", "vegetable", "vegetables"], - adverb: ["fruit", "fruits", "vegetable", "vegetables"], + adjective: [], + adverb: [], }, kiwen: { noun: ["hard thing", "hard things"], - adjective: ["hard thing", "hard things"], - adverb: ["hard thing", "hard things"], + adjective: ["hard"], + adverb: [], }, ko: { noun: ["soft thing", "soft things", "powder"], - adjective: ["soft thing", "soft things", "powder"], - adverb: ["soft thing", "soft things", "powder"], - }, - kon: { - noun: ["air", "essence"], - adjective: ["air", "essence"], - adverb: ["air", "essence"], + adjective: ["soft"], + adverb: [], }, + kon: { noun: ["air", "essence"], adjective: [], adverb: [] }, kule: { noun: ["color", "colors"], - adjective: ["color", "colors"], - adverb: ["color", "colors"], - }, - kulupu: { - noun: ["group", "groups"], - adjective: ["group", "groups"], - adverb: ["group", "groups"], + adjective: ["colorful"], + adverb: ["colorfully"], }, + kulupu: { noun: ["group", "groups"], adjective: [], adverb: [] }, kute: { noun: ["ear", "ears", "listening"], - adjective: ["ear", "ears", "listening"], - adverb: ["ear", "ears", "listening"], + adjective: [], + adverb: [], }, lape: { noun: ["sleep", "rest"], - adjective: ["sleep", "rest"], - adverb: ["sleep", "rest"], + adjective: ["sleeping"], + adverb: [], }, laso: { noun: ["blueness", "greenness"], - adjective: ["blueness", "greenness"], - adverb: ["blueness", "greenness"], + adjective: ["blue", "green"], + adverb: [], }, lawa: { noun: ["head", "heads", "control", "controls"], - adjective: ["head", "heads", "control", "controls"], - adverb: ["head", "heads", "control", "controls"], + adjective: ["controlling"], + adverb: [], }, len: { noun: ["cloth", "clothes", "hiding"], - adjective: ["cloth", "clothes", "hiding"], - adverb: ["cloth", "clothes", "hiding"], + adjective: ["hidden"], + adverb: [], }, lete: { noun: ["coldness"], - adjective: ["coldness"], - adverb: ["coldness"], + adjective: ["cold", "uncooked"], + adverb: [], }, lili: { noun: ["smallness"], - adjective: ["smallness"], - adverb: ["smallness"], + adjective: ["small"], + adverb: ["slightly"], }, linja: { noun: ["long flexible thing", "long flexible things"], - adjective: ["long flexible thing", "long flexible things"], - adverb: ["long flexible thing", "long flexible things"], + adjective: ["long flexible"], + adverb: [], }, lipu: { noun: ["book", "books", "paper", "paper-like thing", "paper-like things"], - adjective: [ - "book", - "books", - "paper", - "paper-like thing", - "paper-like things", - ], - adverb: [ - "book", - "books", - "paper", - "paper-like thing", - "paper-like things", - ], - }, - loje: { - noun: ["redness"], - adjective: ["redness"], - adverb: ["redness"], + adjective: ["paper-like"], + adverb: [], }, + loje: { noun: ["redness"], adjective: ["red"], adverb: [] }, lon: { noun: ["truth", "true"], - adjective: ["truth", "true"], - adverb: ["truth", "true"], + adjective: ["truthful"], + adverb: ["truthfully"], }, luka: { noun: ["hand", "hands", "arm", "arms"], - adjective: ["hand", "hands", "arm", "arms"], - adverb: ["hand", "hands", "arm", "arms"], - }, - lukin: { - noun: ["eye", "eyes", "sight"], - adjective: ["eye", "eyes", "sight"], - adverb: ["eye", "eyes", "sight"], - }, - lupa: { - noun: ["hole", "holes"], - adjective: ["hole", "holes"], - adverb: ["hole", "holes"], + adjective: [], + adverb: [], }, + lukin: { noun: ["eye", "eyes", "sight"], adjective: [], adverb: [] }, + lupa: { noun: ["hole", "holes"], adjective: [], adverb: [] }, ma: { noun: ["place", "places", "earth"], - adjective: ["place", "places", "earth"], - adverb: ["place", "places", "earth"], + adjective: ["earthy"], + adverb: [], }, mama: { noun: ["parent", "parents", "creator", "creators"], - adjective: ["parent", "parents", "creator", "creators"], - adverb: ["parent", "parents", "creator", "creators"], + adjective: [], + adverb: [], }, mani: { noun: ["money", "large domestic animal", "large domestic animals"], - adjective: ["money", "large domestic animal", "large domestic animals"], - adverb: ["money", "large domestic animal", "large domestic animals"], + adjective: [], + adverb: [], }, meli: { noun: ["woman", "women", "feminity"], - adjective: ["woman", "women", "feminity"], - adverb: ["woman", "women", "feminity"], + adjective: ["woman", "feminine"], + adverb: [], }, mi: { noun: ["I", "me", "we", "us"], - adjective: ["I", "me", "we", "us"], - adverb: ["I", "me", "we", "us"], + adjective: ["my", "our"], + adverb: [], }, mije: { noun: ["man", "men", "masculinity"], - adjective: ["man", "men", "masculinity"], - adverb: ["man", "men", "masculinity"], + adjective: ["man", "masculine"], + adverb: [], }, moku: { noun: ["food", "foods", "drink", "drinks"], - adjective: ["food", "foods", "drink", "drinks"], - adverb: ["food", "foods", "drink", "drinks"], + adjective: [], + adverb: [], }, - moli: { noun: ["death"], adjective: ["death"], adverb: ["death"] }, - monsi: { noun: ["back"], adjective: ["back"], adverb: ["back"] }, - mu: { noun: ["moo"], adjective: ["moo"], adverb: ["moo"] }, + moli: { noun: ["death"], adjective: ["dead", "deadly"], adverb: [] }, + monsi: { noun: ["back"], adjective: [], adverb: [] }, + mu: { noun: ["moo"], adjective: ["mooing"], adverb: [] }, mun: { noun: ["celestial object", "celestial objects", "glowing thing"], - adjective: ["celestial object", "celestial objects", "glowing thing"], - adverb: ["celestial object", "celestial objects", "glowing thing"], + adjective: ["glowing"], + adverb: [], }, musi: { noun: ["entertainment", "entertainments"], - adjective: ["entertainment", "entertainments"], - adverb: ["entertainment", "entertainments"], + adjective: ["entertaining"], + adverb: ["entertainingly"], }, - mute: { noun: ["many"], adjective: ["many"], adverb: ["many"] }, + mute: { noun: ["many"], adjective: ["many"], adverb: ["very"] }, nanpa: { noun: ["number", "numbers"], - adjective: ["number", "numbers"], - adverb: ["number", "numbers"], + adjective: ["numeric"], + adverb: ["numerically"], }, nasa: { noun: ["silliness", "strangeness"], - adjective: ["silliness", "strangeness"], - adverb: ["silliness", "strangeness"], + adjective: ["silly", "strange"], + adverb: ["strangely"], }, - nasin: { noun: ["way"], adjective: ["way"], adverb: ["way"] }, - nena: { noun: ["bump"], adjective: ["bump"], adverb: ["bump"] }, + nasin: { noun: ["way"], adjective: [], adverb: [] }, + nena: { noun: ["bump"], adjective: [], adverb: [] }, ni: { noun: ["this", "that"], adjective: ["this", "that"], - adverb: ["this", "that"], + adverb: [], }, nimi: { noun: ["name", "names", "word", "words"], - adjective: ["name", "names", "word", "words"], - adverb: ["name", "names", "word", "words"], + adjective: [], + adverb: [], }, noka: { noun: ["foot", "feet", "leg", "legs"], - adjective: ["foot", "feet", "leg", "legs"], - adverb: ["foot", "feet", "leg", "legs"], + adjective: [], + adverb: [], }, - olin: { noun: ["love"], adjective: ["love"], adverb: ["love"] }, + olin: { noun: ["love"], adjective: [], adverb: [] }, ona: { noun: ["they", "them", "it"], - adjective: ["they", "them", "it"], - adverb: ["they", "them", "it"], + adjective: ["their", "its"], + adverb: [], }, open: { noun: ["beginning", "beginnings"], - adjective: ["beginning", "beginnings"], - adverb: ["beginning", "beginnings"], + adjective: [], + adverb: [], }, pakala: { noun: ["mistake", "mistakes"], - adjective: ["mistake", "mistakes"], - adverb: ["mistake", "mistakes"], - }, - pan: { - noun: ["grain", "grains"], - adjective: ["grain", "grains"], - adverb: ["grain", "grains"], + adjective: ["broken"], + adverb: [], }, - pana: { noun: ["giving"], adjective: ["giving"], adverb: ["giving"] }, - pali: { noun: ["work"], adjective: ["work"], adverb: ["work"] }, + pan: { noun: ["grain", "grains"], adjective: [], adverb: [] }, + pana: { noun: ["giving"], adjective: [], adverb: [] }, + pali: { noun: ["work"], adjective: ["working"], adverb: [] }, palisa: { noun: ["long hard thing", "long hard things"], - adjective: ["long hard thing", "long hard things"], - adverb: ["long hard thing", "long hard things"], - }, - pilin: { - noun: ["emotion", "emotions"], - adjective: ["emotion", "emotions"], - adverb: ["emotion", "emotions"], + adjective: ["long hard"], + adverb: [], }, + pilin: { noun: ["emotion", "emotions"], adjective: [], adverb: [] }, pimeja: { noun: ["blackness", "brownness", "grayness"], - adjective: ["blackness", "brownness", "grayness"], - adverb: ["blackness", "brownness", "grayness"], - }, - pini: { - noun: ["end", "ends"], - adjective: ["end", "ends"], - adverb: ["end", "ends"], + adjective: ["black", "brown", "gray"], + adverb: [], }, + pini: { noun: ["end", "ends"], adjective: ["ended"], adverb: [] }, pipi: { noun: ["insect", "insects", "bug", "bugs"], - adjective: ["insect", "insects", "bug", "bugs"], - adverb: ["insect", "insects", "bug", "bugs"], - }, - poka: { - noun: ["side", "sides", "hips"], - adjective: ["side", "sides", "hips"], - adverb: ["side", "sides", "hips"], - }, - poki: { - noun: ["container"], - adjective: ["container"], - adverb: ["container"], + adjective: ["bug-like", "insect-like"], + adverb: [], }, + poka: { noun: ["side", "sides", "hips"], adjective: [], adverb: [] }, + poki: { noun: ["container"], adjective: [], adverb: [] }, pona: { noun: ["goodness", "simplicity"], - adjective: ["goodness", "simplicity"], - adverb: ["goodness", "simplicity"], - }, - sama: { - noun: ["similarity"], - adjective: ["similarity"], - adverb: ["similarity"], + adjective: ["good", "simple"], + adverb: ["nicely"], }, + sama: { noun: ["similarity"], adjective: [], adverb: ["equally"] }, seli: { noun: ["fire", "heat", "chemical reaction", "chemical reactions"], - adjective: ["fire", "heat", "chemical reaction", "chemical reactions"], - adverb: ["fire", "heat", "chemical reaction", "chemical reactions"], + adjective: ["hot"], + adverb: [], }, selo: { noun: ["outer form", "skin", "boundary", "boundaries"], - adjective: ["outer form", "skin", "boundary", "boundaries"], - adverb: ["outer form", "skin", "boundary", "boundaries"], + adjective: [], + adverb: [], }, seme: { noun: ["what", "which"], adjective: ["what", "which"], - adverb: ["what", "which"], + adverb: [], }, sewi: { noun: ["above", "divinity"], - adjective: ["above", "divinity"], - adverb: ["above", "divinity"], - }, - sijelo: { - noun: ["body", "bodies"], - adjective: ["body", "bodies"], - adverb: ["body", "bodies"], + adjective: ["divine"], + adverb: ["divinely"], }, + sijelo: { noun: ["body", "bodies"], adjective: [], adverb: [] }, sike: { noun: ["round thing", "round things", "cycle"], - adjective: ["round thing", "round things", "cycle"], - adverb: ["round thing", "round things", "cycle"], + adjective: ["round"], + adverb: ["repeatedly"], }, sin: { noun: ["new thing", "new things"], - adjective: ["new thing", "new things"], - adverb: ["new thing", "new things"], - }, - sina: { - noun: ["you", "you all"], - adjective: ["you", "you all"], - adverb: ["you", "you all"], + adjective: ["new"], + adverb: ["newly"], }, + sina: { noun: ["you", "you all"], adjective: ["your"], adverb: [] }, sinpin: { noun: ["face", "faces", "wall", "walls"], - adjective: ["face", "faces", "wall", "walls"], - adverb: ["face", "faces", "wall", "walls"], + adjective: [], + adverb: [], }, sitelen: { noun: ["writing", "writings", "image", "images"], - adjective: ["writing", "writings", "image", "images"], - adverb: ["writing", "writings", "image", "images"], + adjective: [], + adverb: [], }, sona: { noun: ["knowledge"], - adjective: ["knowledge"], - adverb: ["knowledge"], + adjective: ["knowledgeable"], + adverb: [], }, soweli: { noun: ["animal", "animals"], - adjective: ["animal", "animals"], - adverb: ["animal", "animals"], + adjective: ["animal-like"], + adverb: [], }, suli: { noun: ["hugeness", "importance"], - adjective: ["hugeness", "importance"], - adverb: ["hugeness", "importance"], + adjective: ["huge", "important"], + adverb: ["hugely", "importantly"], }, suno: { noun: ["light source", "light sources", "sun"], - adjective: ["light source", "light sources", "sun"], - adverb: ["light source", "light sources", "sun"], + adjective: ["shining"], + adverb: [], }, supa: { noun: ["horizontal surface", "horizontal surfaces"], - adjective: ["horizontal surface", "horizontal surfaces"], - adverb: ["horizontal surface", "horizontal surfaces"], + adjective: [], + adverb: [], }, suwi: { noun: ["sweetness", "cuteness", "innocence"], - adjective: ["sweetness", "cuteness", "innocence"], - adverb: ["sweetness", "cuteness", "innocence"], - }, - tan: { - noun: ["reason", "origin"], - adjective: ["reason", "origin"], - adverb: ["reason", "origin"], - }, - taso: { noun: [], adjective: [], adverb: [] }, - tawa: { - noun: ["movement"], - adjective: ["movement"], - adverb: ["movement"], - }, - telo: { noun: ["liquid"], adjective: ["liquid"], adverb: ["liquid"] }, - tenpo: { noun: ["time"], adjective: ["time"], adverb: ["time"] }, + adjective: ["sweet", "cute", "innocent"], + adverb: ["sweetly"], + }, + tan: { noun: ["reason", "origin"], adjective: [], adverb: [] }, + tawa: { noun: ["movement"], adjective: ["moving"], adverb: [] }, + telo: { noun: ["liquid"], adjective: ["liquid"], adverb: [] }, + tenpo: { noun: ["time"], adjective: [], adverb: [] }, toki: { noun: [ "communication", @@ -474,78 +364,43 @@ export const TRANSLATION: { [key: string]: Translation } = { "languages", "hello", ], - adjective: [ - "communication", - "communications", - "language", - "languages", - "hello", - ], - adverb: [ - "communication", - "communications", - "language", - "languages", - "hello", - ], - }, - tomo: { - noun: ["house", "houses"], - adjective: ["house", "houses"], - adverb: ["house", "houses"], + adjective: ["communicating"], + adverb: [], }, + tomo: { noun: ["house", "houses"], adjective: [], adverb: [] }, tonsi: { - noun: [ - "transgender person", - "transgender people", - "non-binary person", - "non-binary people", - ], - adjective: [ - "transgender person", - "transgender people", - "non-binary person", - "non-binary people", - ], - adverb: [ - "transgender person", - "transgender people", - "non-binary person", - "non-binary people", - ], + noun: ["transgender", "transgenders", "non-binary", "non-binaries"], + adjective: ["transgender", "non-binary"], + adverb: [], }, - tu: { noun: ["pair"], adjective: ["pair"], adverb: ["pair"] }, - unpa: { noun: ["sex"], adjective: ["sex"], adverb: ["sex"] }, - uta: { noun: ["mouth"], adjective: ["mouth"], adverb: ["mouth"] }, + tu: { noun: ["pair"], adjective: ["two"], adverb: [] }, + unpa: { noun: ["sex"], adjective: ["sexual"], adverb: ["sexually"] }, + uta: { noun: ["mouth"], adjective: [], adverb: [] }, utala: { noun: ["conflict", "difficulty"], - adjective: ["conflict", "difficulty"], - adverb: ["conflict", "difficulty"], + adjective: ["conflicting", "difficult"], + adverb: ["conflictingly", "difficultly"], }, walo: { noun: ["whiteness", "paleness"], - adjective: ["whiteness", "paleness"], - adverb: ["whiteness", "paleness"], + adjective: ["white", "pale"], + adverb: [], }, - wan: { noun: ["one"], adjective: ["one"], adverb: ["one"] }, + wan: { noun: ["one"], adjective: ["one"], adverb: [] }, waso: { noun: ["bird", "birds"], - adjective: ["bird", "birds"], - adverb: ["bird", "birds"], + adjective: ["bird-like"], + adverb: [], }, wawa: { noun: ["power", "powers"], - adjective: ["power", "powers"], - adverb: ["power", "powers"], - }, - weka: { - noun: ["leaving"], - adjective: ["leaving"], - adverb: ["leaving"], + adjective: ["powerful"], + adverb: ["powerfully"], }, + weka: { noun: ["leaving"], adjective: ["leaving"], adverb: [] }, wile: { noun: ["want", "wants", "need", "needs"], - adjective: ["want", "wants", "need", "needs"], - adverb: ["want", "wants", "need", "needs"], + adjective: [], + adverb: [], }, }; From 084ddc76ca8b88d15d2ee11781f410a625a0c291 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 20:19:51 +0800 Subject: [PATCH 198/271] apply translation changes --- src/translation.ts | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/translation.ts b/src/translation.ts index 45ea282..509c2ac 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -369,7 +369,12 @@ export const TRANSLATION: { [key: string]: Translation } = { }, tomo: { noun: ["house", "houses"], adjective: [], adverb: [] }, tonsi: { - noun: ["transgender", "transgenders", "non-binary", "non-binaries"], + noun: [ + "transgender person", + "transgender people", + "non-binary person", + "non-binary people", + ], adjective: ["transgender", "non-binary"], adverb: [], }, From e4eba164d4fe4086b4756b03d53c1ee213afac86 Mon Sep 17 00:00:00 2001 From: neverRare Date: Sun, 21 Jan 2024 20:21:57 +0800 Subject: [PATCH 199/271] add "pu" without translations --- src/translation.ts | 5 +++++ src/vocabulary.ts | 1 + 2 files changed, 6 insertions(+) diff --git a/src/translation.ts b/src/translation.ts index 509c2ac..02950e7 100644 --- a/src/translation.ts +++ b/src/translation.ts @@ -279,6 +279,11 @@ export const TRANSLATION: { [key: string]: Translation } = { adjective: ["good", "simple"], adverb: ["nicely"], }, + pu: { + noun: [], + adjective: [], + adverb: [], + }, sama: { noun: ["similarity"], adjective: [], adverb: ["equally"] }, seli: { noun: ["fire", "heat", "chemical reaction", "chemical reactions"], diff --git a/src/vocabulary.ts b/src/vocabulary.ts index 890a5b5..360d39c 100644 --- a/src/vocabulary.ts +++ b/src/vocabulary.ts @@ -92,6 +92,7 @@ export const CONTENT_WORD = new Set([ "poka", "poki", "pona", + "pu", "sama", "seli", "selo", From c1181ab84920263c86c0bb351a3c3ad063b6cdf4 Mon Sep 17 00:00:00 2001 From: neverRare Date: Mon, 22 Jan 2024 09:58:00 +0800 Subject: [PATCH 200/271] filter out "nanpa ala nanpa" --- src/filter.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index eeba9a0..eade6ff 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -38,6 +38,13 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ } return true; }, + // disallow _nanpa ala nanpa_ + (modifier) => { + if (modifier.type === "nanpa" && modifier.nanpa.type === "x ala x") { + throw new UnrecognizedError('"nanpa ala nanpa"'); + } + return true; + }, // nanpa construction cannot contain preposition (modifier) => { if (modifier.type === "nanpa" && modifier.phrase.type === "preposition") { From 9b5fd29b6b3dc1da7943c398598f7b543e775380 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 11:53:19 +0800 Subject: [PATCH 201/271] implement translator for a single sentence --- src/translator.ts | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index b2bb4b6..6b8da36 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,13 +1,40 @@ -import { Sentence } from "./ast.ts"; +import { FullClause, Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; /** A special kind of Output that translators returns. */ export type TranslationOutput = Output; +function translateFullClause(fullClause: FullClause): TranslationOutput { + throw new Error("todo"); +} /** Translates a single sentence. */ function translateSentence(sentence: Sentence): TranslationOutput { - throw new Error("todo"); + const laClauses = sentence.laClauses; + const contexts = laClauses.slice(0, laClauses.length - 1); + const final = laClauses[laClauses.length - 1]; + const contextTranslation = contexts.reduce( + (output, context) => + output.flatMap((left) => + translateFullClause(context).map((right) => { + if (left === "") { + return `given ${right}, `; + } else { + return `${left}given ${right}`; + } + }) + ), + new Output([""]), + ); + return contextTranslation.flatMap((contexts) => + translateFullClause(final).map((final) => { + if (contexts === "") { + return [final, sentence.punctuation].join(""); + } else { + return [contexts, final, sentence.punctuation].join(""); + } + }) + ); } /** Translates multiple sentences. */ function translateSentences(sentences: Array): TranslationOutput { From 28dde3e248ce8e033c0fdc4ff6ceb2a3da315e8e Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 12:44:29 +0800 Subject: [PATCH 202/271] simplify translator functions --- src/translator.ts | 65 ++++++++++++++++++++--------------------------- 1 file changed, 28 insertions(+), 37 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 6b8da36..3d7a5e2 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -5,51 +5,42 @@ import { parser } from "./parser.ts"; /** A special kind of Output that translators returns. */ export type TranslationOutput = Output; +/** + * Helper function for turning array or tuple of Output into Output of array or + * tuple. + */ +// TODO: maybe there's a better name +function rotate>( + array: { [I in keyof T]: Output } & { length: T["length"] }, +): Output { + // We resorted to using `any` types here, make sure it works properly + return array.reduce( + // deno-lint-ignore no-explicit-any + (result: Output, output) => + result.flatMap((left) => output.map((right) => [...left, right])), + // deno-lint-ignore no-explicit-any + new Output([[]]), + ) as Output; +} function translateFullClause(fullClause: FullClause): TranslationOutput { throw new Error("todo"); } /** Translates a single sentence. */ function translateSentence(sentence: Sentence): TranslationOutput { - const laClauses = sentence.laClauses; - const contexts = laClauses.slice(0, laClauses.length - 1); - const final = laClauses[laClauses.length - 1]; - const contextTranslation = contexts.reduce( - (output, context) => - output.flatMap((left) => - translateFullClause(context).map((right) => { - if (left === "") { - return `given ${right}, `; - } else { - return `${left}given ${right}`; - } - }) - ), - new Output([""]), - ); - return contextTranslation.flatMap((contexts) => - translateFullClause(final).map((final) => { - if (contexts === "") { - return [final, sentence.punctuation].join(""); - } else { - return [contexts, final, sentence.punctuation].join(""); - } - }) - ); + return rotate(sentence.laClauses.map(translateFullClause)).map((clauses) => { + const contexts = clauses.slice(0, clauses.length - 1); + const final = clauses[clauses.length - 1]; + return [ + ...contexts.map((context) => `given ${context}, `), + final, + sentence.punctuation, + ].join(""); + }); } /** Translates multiple sentences. */ function translateSentences(sentences: Array): TranslationOutput { - return sentences.reduce( - (output, sentence) => - output.flatMap((left) => - translateSentence(sentence).map((right) => { - if (left === "") { - return right; - } else { - return [left, right].join(" "); - } - }) - ), - new Output([""]), + return rotate(sentences.map(translateSentence)).map((sentences) => + sentences.join(" ") ); } /** Full Toki Pona translator. */ From 7429ebec449f9cac1002bd353297dd31566fdfee Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 12:59:42 +0800 Subject: [PATCH 203/271] implement translator for clause --- src/translator.ts | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index 3d7a5e2..1dffabc 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,4 +1,6 @@ +import { Clause } from "./ast.ts"; import { FullClause, Sentence } from "./ast.ts"; +import { UnreachableError } from "./error.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; @@ -22,9 +24,37 @@ function rotate>( new Output([[]]), ) as Output; } -function translateFullClause(fullClause: FullClause): TranslationOutput { +/** Translates a clause. */ +function translateClause(clause: Clause): TranslationOutput { throw new Error("todo"); } +/** Translates a full clause. */ +function translateFullClause(fullClause: FullClause): TranslationOutput { + return translateClause(fullClause.clause).map((clause) => { + let but = ""; + const taso = fullClause.taso; + if (taso) { + if (taso.type === "default") { + but = "but "; + } else if (taso.type === "reduplication") { + but = new Array(taso.count).fill("but ").join(); + } else { + throw new UnreachableError(); + } + } + let isntIt = ""; + const anuSeme = fullClause.anuSeme; + if (anuSeme) { + if (anuSeme.type === "default") { + isntIt = ", isn't it"; + } else if (anuSeme.type === "reduplication") { + // TODO: better translation + isntIt = new Array(anuSeme.count).fill(", isn't it").join(); + } + } + return [but, clause, isntIt].join(""); + }); +} /** Translates a single sentence. */ function translateSentence(sentence: Sentence): TranslationOutput { return rotate(sentence.laClauses.map(translateFullClause)).map((clauses) => { From 68ae263c46fca73ec6ac8b22d2f01e1b5e028c65 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:02:31 +0800 Subject: [PATCH 204/271] this can be placed outside --- src/translator.ts | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 1dffabc..3f5a67d 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -30,28 +30,28 @@ function translateClause(clause: Clause): TranslationOutput { } /** Translates a full clause. */ function translateFullClause(fullClause: FullClause): TranslationOutput { - return translateClause(fullClause.clause).map((clause) => { - let but = ""; - const taso = fullClause.taso; - if (taso) { - if (taso.type === "default") { - but = "but "; - } else if (taso.type === "reduplication") { - but = new Array(taso.count).fill("but ").join(); - } else { - throw new UnreachableError(); - } + let but = ""; + const taso = fullClause.taso; + if (taso) { + if (taso.type === "default") { + but = "but "; + } else if (taso.type === "reduplication") { + but = new Array(taso.count).fill("but ").join(); + } else { + throw new UnreachableError(); } - let isntIt = ""; - const anuSeme = fullClause.anuSeme; - if (anuSeme) { - if (anuSeme.type === "default") { - isntIt = ", isn't it"; - } else if (anuSeme.type === "reduplication") { - // TODO: better translation - isntIt = new Array(anuSeme.count).fill(", isn't it").join(); - } + } + let isntIt = ""; + const anuSeme = fullClause.anuSeme; + if (anuSeme) { + if (anuSeme.type === "default") { + isntIt = ", isn't it"; + } else if (anuSeme.type === "reduplication") { + // TODO: better translation + isntIt = new Array(anuSeme.count).fill(", isn't it").join(); } + } + return translateClause(fullClause.clause).map((clause) => { return [but, clause, isntIt].join(""); }); } From 2fba33a0d45f8b706f7808b06a882508d27ed297 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:05:04 +0800 Subject: [PATCH 205/271] no error idk --- src/translator.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 3f5a67d..bd2fe3d 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -37,8 +37,6 @@ function translateFullClause(fullClause: FullClause): TranslationOutput { but = "but "; } else if (taso.type === "reduplication") { but = new Array(taso.count).fill("but ").join(); - } else { - throw new UnreachableError(); } } let isntIt = ""; From c0761a371cfd56f7c02ff332a54e8faa17241b31 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:08:14 +0800 Subject: [PATCH 206/271] formatting --- src/translator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index bd2fe3d..14fb4d6 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -49,9 +49,9 @@ function translateFullClause(fullClause: FullClause): TranslationOutput { isntIt = new Array(anuSeme.count).fill(", isn't it").join(); } } - return translateClause(fullClause.clause).map((clause) => { - return [but, clause, isntIt].join(""); - }); + return translateClause(fullClause.clause).map((clause) => + [but, clause, isntIt].join("") + ); } /** Translates a single sentence. */ function translateSentence(sentence: Sentence): TranslationOutput { From 8af3384817db95d715162e221affac25cabfbd23 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:40:37 +0800 Subject: [PATCH 207/271] lazy is unnecessary here --- src/parser.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index c1ea679..1a10e73 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -412,8 +412,8 @@ function nestedPhrases( ); } else { return choice( - lazy(() => nestedPhrasesOnly(nestingRule)), - lazy(() => nestedPhrases(nestingRule.slice(1))), + nestedPhrasesOnly(nestingRule), + nestedPhrases(nestingRule.slice(1)), ); } } @@ -487,13 +487,13 @@ function multiplePredicates( sequence( choice( associatedPredicates(nestingRule), - lazy(() => multiplePredicates(rest)), + multiplePredicates(rest), ), manyAtLeastOnce( optionalComma().with(specificWord(first)).with( choice( associatedPredicates(nestingRule), - lazy(() => multiplePredicates(rest)), + multiplePredicates(rest), ), ), ), @@ -501,7 +501,7 @@ function multiplePredicates( type, predicates: [group, ...moreGroups], } as MultiplePredicates)), - lazy(() => multiplePredicates(rest)), + multiplePredicates(rest), ); } } From 329b07778bdfcd62e196b163dfa06e056ca1af15 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:43:42 +0800 Subject: [PATCH 208/271] add todo note for avoiding duplicate --- src/parser.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/parser.ts b/src/parser.ts index 1a10e73..75ab9c2 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -418,6 +418,7 @@ function nestedPhrases( } } /** Parses phrases separated by _en_ or _anu_. */ +// TODO: avoid duplicates function subjectPhrases(): Parser { return choice( nestedPhrases(["en", "anu"]), From 057b0dee5460bd565381f9e8410539f1d37c6212 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:47:36 +0800 Subject: [PATCH 209/271] add main script and add translate button --- index.html | 3 +++ src/main.ts | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) create mode 100644 src/main.ts diff --git a/index.html b/index.html index 19eff16..d3b0b04 100644 --- a/index.html +++ b/index.html @@ -29,6 +29,9 @@

    Toki Pona Translator

    >.

    +
    + +

      diff --git a/src/main.ts b/src/main.ts new file mode 100644 index 0000000..2d18726 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,26 @@ +import { translate } from "./translator.ts"; + +document.addEventListener("DOMContentLoaded", () => { + const input = document.getElementById("input") as HTMLTextAreaElement; + const output = document.getElementById("output") as HTMLUListElement; + const error = document.getElementById("error") as HTMLParagraphElement; + const button = document.getElementById( + "translate-button", + ) as HTMLButtonElement; + button.addEventListener("click", () => { + while (output.children.length > 0) { + output.removeChild(output.children[0]); + } + error.innerText = ""; + const translations = translate(input.value); + if (translations.isError()) { + error.innerText = translations.error?.message ?? "No error provided"; + } else { + for (const translation of translations.output) { + const list = document.createElement("li"); + list.innerText = translation; + output.appendChild(list); + } + } + }); +}); From 911dec951b0abb85427e811fb70343f9b367503b Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:49:36 +0800 Subject: [PATCH 210/271] restructure README.md --- README.md | 28 ++-------------------------- 1 file changed, 2 insertions(+), 26 deletions(-) diff --git a/README.md b/README.md index bebb8f6..313b644 100644 --- a/README.md +++ b/README.md @@ -4,33 +4,9 @@ An imperfect Toki Pona to English translator that translates into multiple sente [Try it](https://neverrare.github.io/toki-pona-translator/) -## Goals +## Building -The goals for this projects are: - -- Provide translation that covers most of semantics and meaning of a Toki Pona sentence, but it doesn't have to be complete. This gives translations for users to scan into to give them a feel of how broad a Toki Pona sentence can mean. -- As much as possible, provide translations that are grammatically sound: not just correct but also feels right. For example, "one red thing" sounds better than "red one thing". Due to the difference of English and Toki Pona and nuances of English, the translator may fall severely short for this goal, but we can try! - -## Non-goals - -- Provide every possible translations. -- Handle every edge cases of Toki Pona grammar. Some edge cases are listed in [limitations] along with others. -- Handle compounds such as translating "tomo tawa" into "vehicle" -- Translate Tokiponized proper word into Untokiponized word such as translating "Manka" into "Minecraft" - -Some of these may be lifted in the future. - -## Terminology - -These are the terminology used in [limitations]. **These are not official grammatical terms**. - -- Headword – A single part of speech that in English, can be a noun, a verb, or an adjective; what the phrase starts with. -- Modifier – A part of speech that modifies headword or another modifier. -- Phrase – Headword and its modifiers. -- Preclause – "taso" or "a" particle before clauses. -- Postclause – "a" particle or "anu seme" phrase after clauses. -- Clause – Phrase or sentence found before and after "la". -- Proper Word – Proper name; Capitalized in Toki Pona. +TODO ## Limitations From e628df2d43295b4b2ddfecfb4f7e9a4220d5fd61 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:50:26 +0800 Subject: [PATCH 211/271] use link to wiki --- index.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/index.html b/index.html index d3b0b04..8778900 100644 --- a/index.html +++ b/index.html @@ -24,7 +24,8 @@

      Toki Pona Translator

      An imperfect Toki Pona to English translator that translates into multiple sentences. - Limitations.

      From 1222e6e887c1f2398bae2984d6f090988429c3a8 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 13:54:37 +0800 Subject: [PATCH 212/271] remove useless link reference --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 313b644..723f022 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,6 @@ TODO ## Limitations -[limitations]: #limitations - The following are currently unrecognized (non-definitive but pedantic). ✏️ means it is a limitation due to being work in progress and it will be lifted soon. Other limitation may also be lifted. - ✏️ Full sentences: It can only translate phrases for now. From c956729e27bbd857541d06e21a60c1df4dc651db Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 14:00:40 +0800 Subject: [PATCH 213/271] add CONTRIBUTING.md --- CONTRIBUTING.md | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 CONTRIBUTING.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..3a12db0 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,7 @@ +# Contributing + +You can do the usual github stuff: Open issue if there's an issue or you have a suggestion; Open pull request if you want to propose changes. If you want to propose a large change however, please open an issue first (or comment on an already existing issue page), and wait for my signal before beginning to work. + +## The wiki + +The wiki provides useful information for contributors, although it's not complete yet. Check it out: [Visit wiki](https://github.com/neverRare/toki-pona-translator/wiki). From aecf598788b4a352c499cd3eedcf84b73eb0b8e5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 14:02:59 +0800 Subject: [PATCH 214/271] update style for summary --- style.css | 3 +++ 1 file changed, 3 insertions(+) diff --git a/style.css b/style.css index bea3e1f..abc64a5 100644 --- a/style.css +++ b/style.css @@ -36,3 +36,6 @@ a:visited { color: #ff5e5e; } } +summary { + cursor: pointer; +} From c124b9052e83db032116cf7b41beba846fcebc01 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 14:04:37 +0800 Subject: [PATCH 215/271] add comment to old code --- main.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/main.js b/main.js index 12710b4..a2f0ea6 100644 --- a/main.js +++ b/main.js @@ -1,5 +1,7 @@ "use strict"; +// This code is no longer needed and being replaced by `src/` + class UnrecognizedError extends Error {} class UntranslatableError extends Error {} From 435303d4285b71115e5276a89c7522a604eb5ad0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 14:42:06 +0800 Subject: [PATCH 216/271] add code for bundling codes --- bundle.ts | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 bundle.ts diff --git a/bundle.ts b/bundle.ts new file mode 100644 index 0000000..a2e0554 --- /dev/null +++ b/bundle.ts @@ -0,0 +1,10 @@ +import { bundle } from "https://deno.land/x/emit@0.34.0/mod.ts"; + +const SOURCE = "./src/main.ts"; +const DESTINATION = "./main.js"; + +const url = new URL(SOURCE, import.meta.url); +const result = await bundle(url); + +const { code } = result; +await Deno.writeTextFile(DESTINATION, code); From f547314d7686f97401d9073e8e9f433cc081fcf0 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:42:13 +0800 Subject: [PATCH 217/271] add deno.js --- deno.json | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 deno.json diff --git a/deno.json b/deno.json new file mode 100644 index 0000000..7c43bbb --- /dev/null +++ b/deno.json @@ -0,0 +1,16 @@ +{ + "lock": false, + "compilerOptions": { + "target": "esnext", + "lib": ["dom", "dom.iterable", "dom.asynciterable", "deno.ns"] + }, + "tasks": { + "build": "deno run --allow-read --allow-write --allow-env ./bundle.ts" + }, + "fmt": { + "include": ["./src/**.*", "./bundle.ts", "./test-parser.ts"] + }, + "lint": { + "include": ["./src/**.*", "./bundle.ts", "./test-parser.ts"] + } +} From 911b68f4b217098e6da0d68879274ee4c5866edb Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:42:22 +0800 Subject: [PATCH 218/271] add instruction for building --- README.md | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 723f022..d16896a 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,17 @@ An imperfect Toki Pona to English translator that translates into multiple sente ## Building -TODO +You'll need [Deno](https://deno.com/). Run the following command. + +``` +git clone https://github.com/neverRare/toki-pona-translator.git +cd toki-pona-translator +deno task build +``` + +Then open `./index.html` using your favorite browser. + +Whenever you made changes to `./src/**.ts`, you'll need to run `deno task build` again and refresh the browser. Later I'll make a script to automate this. ## Limitations From e31832323e405179c8d1168a51ee17fac0a7aeb5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:43:43 +0800 Subject: [PATCH 219/271] lint --- src/translator.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index 14fb4d6..a8262e3 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,6 +1,5 @@ import { Clause } from "./ast.ts"; import { FullClause, Sentence } from "./ast.ts"; -import { UnreachableError } from "./error.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; From b7be7190f316246ae324d3da14d94c031de84b93 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:51:44 +0800 Subject: [PATCH 220/271] add code for handling enter key --- src/main.ts | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/main.ts b/src/main.ts index 2d18726..80f8396 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,7 +7,7 @@ document.addEventListener("DOMContentLoaded", () => { const button = document.getElementById( "translate-button", ) as HTMLButtonElement; - button.addEventListener("click", () => { + const translate = () => { while (output.children.length > 0) { output.removeChild(output.children[0]); } @@ -22,5 +22,12 @@ document.addEventListener("DOMContentLoaded", () => { output.appendChild(list); } } + }; + button.addEventListener("click", translate); + input.addEventListener("keydown", (event) => { + if (event.code === "enter") { + translate(); + event.preventDefault(); + } }); }); From 5c00c854e03ce7c873ab75bbb3b08fe89e7bef0e Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:53:00 +0800 Subject: [PATCH 221/271] fix mistake --- src/main.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main.ts b/src/main.ts index 80f8396..50f6eec 100644 --- a/src/main.ts +++ b/src/main.ts @@ -25,7 +25,7 @@ document.addEventListener("DOMContentLoaded", () => { }; button.addEventListener("click", translate); input.addEventListener("keydown", (event) => { - if (event.code === "enter") { + if (event.code === "Enter") { translate(); event.preventDefault(); } From b5bf8594ac1cf3a4d399099960f88707080b8b7e Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:55:40 +0800 Subject: [PATCH 222/271] fix another mistake --- src/main.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main.ts b/src/main.ts index 50f6eec..e62f7d4 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,7 +7,7 @@ document.addEventListener("DOMContentLoaded", () => { const button = document.getElementById( "translate-button", ) as HTMLButtonElement; - const translate = () => { + const output = () => { while (output.children.length > 0) { output.removeChild(output.children[0]); } @@ -26,7 +26,7 @@ document.addEventListener("DOMContentLoaded", () => { button.addEventListener("click", translate); input.addEventListener("keydown", (event) => { if (event.code === "Enter") { - translate(); + output(); event.preventDefault(); } }); From ac175f8f1a26c0f4c0cd3374aa28bde5aaa22ba9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 15:57:26 +0800 Subject: [PATCH 223/271] I can't type check this for whatever reason --- src/main.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main.ts b/src/main.ts index e62f7d4..481e6af 100644 --- a/src/main.ts +++ b/src/main.ts @@ -7,7 +7,7 @@ document.addEventListener("DOMContentLoaded", () => { const button = document.getElementById( "translate-button", ) as HTMLButtonElement; - const output = () => { + const listener = () => { while (output.children.length > 0) { output.removeChild(output.children[0]); } @@ -23,10 +23,10 @@ document.addEventListener("DOMContentLoaded", () => { } } }; - button.addEventListener("click", translate); + button.addEventListener("click", listener); input.addEventListener("keydown", (event) => { if (event.code === "Enter") { - output(); + listener(); event.preventDefault(); } }); From 9363e192fc6ed18abfc44c6d12f98a983a1df71b Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 16:01:56 +0800 Subject: [PATCH 224/271] add comment to maybe use worker --- src/main.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main.ts b/src/main.ts index 481e6af..8ba9a04 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,5 +1,6 @@ import { translate } from "./translator.ts"; +// TODO: maybe use worker document.addEventListener("DOMContentLoaded", () => { const input = document.getElementById("input") as HTMLTextAreaElement; const output = document.getElementById("output") as HTMLUListElement; From d44fc3e6fea1423b54d16e2294539ccc32299eed Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 16:02:52 +0800 Subject: [PATCH 225/271] remove Andika from style.css --- style.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/style.css b/style.css index abc64a5..ccfc037 100644 --- a/style.css +++ b/style.css @@ -1,6 +1,6 @@ body { margin: 10px; - font-family: Andika, sans-serif; + font-family: sans-serif; } a { color: #0057af; From 347ba7f4839c9c53825cfc6b663f1e3970811c97 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 16:05:38 +0800 Subject: [PATCH 226/271] update include --- deno.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deno.json b/deno.json index 7c43bbb..52fee3f 100644 --- a/deno.json +++ b/deno.json @@ -8,9 +8,9 @@ "build": "deno run --allow-read --allow-write --allow-env ./bundle.ts" }, "fmt": { - "include": ["./src/**.*", "./bundle.ts", "./test-parser.ts"] + "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"] }, "lint": { - "include": ["./src/**.*", "./bundle.ts", "./test-parser.ts"] + "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"] } } From 6c25ac4ef50ef5f4a12530ba6495f8a175ae16ae Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 16:10:31 +0800 Subject: [PATCH 227/271] prevented duplicates --- src/parser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 75ab9c2..976bb4d 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -418,11 +418,11 @@ function nestedPhrases( } } /** Parses phrases separated by _en_ or _anu_. */ -// TODO: avoid duplicates function subjectPhrases(): Parser { return choice( - nestedPhrases(["en", "anu"]), - nestedPhrases(["anu", "en"]).filter((phrase) => phrase.type !== "single"), + nestedPhrasesOnly(["en", "anu"]), + nestedPhrasesOnly(["anu", "en"]), + phrase().map((phrase) => ({ type: "single", phrase })), ); } /** Parses prepositional phrase. */ From 0c1ffe68b72634d42da03a5252fc21e0858802d1 Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 16:40:08 +0800 Subject: [PATCH 228/271] update changelog --- CHANGELOG.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 889fd6e..17f885c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,8 +5,10 @@ For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. - New limitation list. -- Drop support for "a" particle. +- Add button for translating, replacing auto-translate when typing. +- (Downgrade) Drop support for "a" particle. - (Downgrade) Error messages are now very unreliable. +- (Downgrade) Translator is somewhat slower. - Remove Discord DM as contact option. - Update translation list: - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭) @@ -16,6 +18,8 @@ Inside update (intended for developers): - Rewritten whole code to use TypeScript, module, and functional programming. - Rewritten parser to use parser combinator. - Add language codes to html. +- New wiki for contributors and thinkerers. +- Overhaul `README.md`, only including build instruction. Information about the translator is now moved to wiki. ## 0.1.1 From 21421d515dac8c55fc1ef8ee8f5fe8bebecad03e Mon Sep 17 00:00:00 2001 From: neverRare Date: Tue, 23 Jan 2024 18:41:34 +0800 Subject: [PATCH 229/271] small change --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index d16896a..7581079 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ deno task build Then open `./index.html` using your favorite browser. -Whenever you made changes to `./src/**.ts`, you'll need to run `deno task build` again and refresh the browser. Later I'll make a script to automate this. +Whenever you made changes to `./src/*.ts`, you'll need to run `deno task build` again and refresh the browser. Later I'll make a script to automate this. ## Limitations From c8a4ec5c059df193c729226fe09fb4c94081519c Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 09:05:02 +0800 Subject: [PATCH 230/271] rename translation to definition --- src/{translation.ts => definition.ts} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename src/{translation.ts => definition.ts} (100%) diff --git a/src/translation.ts b/src/definition.ts similarity index 100% rename from src/translation.ts rename to src/definition.ts From 7bea2af1c8304e00b3bbd57f36fb6bd23062a2b7 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 09:46:14 +0800 Subject: [PATCH 231/271] add TODO note for adding AST walker --- src/filter.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index eade6ff..7c4c834 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -9,6 +9,8 @@ import { } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; +// TODO: AST walker + /** Array of filter rules for a word unit. */ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" From 8fca7b5ee0d103e6476618d3f78e2df87d7131bc Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 09:50:07 +0800 Subject: [PATCH 232/271] improve comments --- src/filter.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index 7c4c834..08fdc12 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -185,7 +185,7 @@ export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [ return true; }, ]; -/** Array of filter rules for a single phrase. */ +/** Array of filter rules for preposition. */ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ // Disallow preverb modifiers other than _ala_ (preposition) => { From e5e6a07d08a59ad6930b7603c51ae182a9d978ea Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 09:57:07 +0800 Subject: [PATCH 233/271] delete unneeded codes --- src/duplicate-checker.ts | 23 ------------ src/fuzzer.ts | 80 ---------------------------------------- 2 files changed, 103 deletions(-) delete mode 100644 src/duplicate-checker.ts delete mode 100644 src/fuzzer.ts diff --git a/src/duplicate-checker.ts b/src/duplicate-checker.ts deleted file mode 100644 index d38c5a5..0000000 --- a/src/duplicate-checker.ts +++ /dev/null @@ -1,23 +0,0 @@ -import { randomPhrase } from "./fuzzer.ts"; -import { parser } from "./parser.ts"; - -const timeStart = +new Date(); -const duration = 10 * 1000; -let count = 0; - -while (+new Date() < timeStart + duration) { - const words = randomPhrase(); - if (words.length > 10) continue; - const src = words.join(" "); - const set = new Set(); - for (const ast of parser(src).output) { - const json = JSON.stringify(ast); - if (set.has(json)) { - throw new Error(`Duplicate found when parsing "${src}".`); - } else { - set.add(json); - } - } - count++; -} -console.log(`Tested ${count} random sentences.`); diff --git a/src/fuzzer.ts b/src/fuzzer.ts deleted file mode 100644 index d76c806..0000000 --- a/src/fuzzer.ts +++ /dev/null @@ -1,80 +0,0 @@ -import { PREVERB } from "./vocabulary.ts"; -import { CONTENT_WORD } from "./vocabulary.ts"; - -const CONSONANTS = "p t k s m n l j w".split(" "); -const VOWELS = "a e i o u".split(" "); - -function randomIn(...items: Array): T { - if (items.length === 0) throw new Error("passed empty arguments"); - return items[randomNumber(items.length - 1)]; -} -function randomNumber(max: number): number { - return Math.floor(Math.random() * (max + 1)); -} -function randomWord(set: Set): string { - return randomIn(...set); -} -function fill(number: number, mapper: () => T): Array { - return new Array(number).fill(undefined).map(mapper); -} -function randomName(): string { - const first = randomIn(...CONSONANTS).toUpperCase() + randomIn(...VOWELS); - const more = fill( - randomNumber(2), - () => randomIn(...CONSONANTS) + randomIn(...VOWELS), - ); - return first + more.join(""); -} -function asAlaQuestion(word: string): Array { - return [word, "ala", word]; -} -function randomModifier(): Array { - return randomIn( - () => [randomWord(CONTENT_WORD)], - () => asAlaQuestion(randomWord(CONTENT_WORD)), - () => [randomName()], - () => ["pi", ...randomPhrase()], - () => ["nanpa", ...randomPhrase()], - randomNumberWords, - )(); -} -function randomNumberWords(): Array { - const words = []; - let number = 1 + randomNumber(400); - while (number > 0) { - if (number >= 100) { - words.push(randomIn("ale", "ali")); - number -= 100; - } else if (number >= 20) { - words.push("mute"); - number -= 20; - } else if (number >= 5) { - words.push("luka"); - number -= 5; - } else if (number >= 2) { - words.push("tu"); - number -= 2; - } else { - words.push("wan"); - number--; - } - } - return words; -} -// TODO: nested preverbs and preposition -// TODO: remove export when randomSentence is defined -export function randomPhrase(): Array { - const modifiers = fill(randomNumber(2), randomModifier).flat(); - const phrase = randomIn(() => { - const headWord = randomIn( - () => [randomWord(CONTENT_WORD)], - () => asAlaQuestion(randomWord(CONTENT_WORD)), - )(); - return [...headWord, ...modifiers]; - }, () => [...randomNumberWords(), ...modifiers])(); - return randomIn( - () => phrase, - () => [...asAlaQuestion(randomWord(PREVERB)), ...phrase], - () => [randomWord(PREVERB), ...phrase], - )(); -} From 5a8ee73edb12a7d7554c84ef5b630b8f46c0cc63 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:06:03 +0800 Subject: [PATCH 234/271] add TODO in filter --- src/filter.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/filter.ts b/src/filter.ts index 08fdc12..59e9ffe 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -10,6 +10,8 @@ import { import { UnrecognizedError } from "./error.ts"; // TODO: AST walker +// TODO: filter nested prepositions +// TODO: filter preposition in subject and object /** Array of filter rules for a word unit. */ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ From 23ca0b0525b29863929e2539f79e576a36b3ea31 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:29:51 +0800 Subject: [PATCH 235/271] implement walker for modifiers --- src/ast.ts | 39 +++++++++++++++++++++++++++++++++ src/filter.ts | 60 ++++++++++++++------------------------------------- 2 files changed, 55 insertions(+), 44 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index 1992d4b..8afdcca 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -100,3 +100,42 @@ export type Quotation = { leftMark: string; rightMark: string; }; +export function someModifierInPhrase( + phrase: Phrase, + whenQuotation: boolean, + checker: (modifier: Modifier) => boolean, +): boolean { + if (phrase.type === "default") { + return phrase.modifiers.some(checker); + } else if (phrase.type === "preverb") { + return phrase.modifiers.some(checker) || + someModifierInPhrase(phrase.phrase, whenQuotation, checker); + } else if (phrase.type === "preposition") { + const preposition = phrase.preposition; + return preposition.modifiers.some(checker) || + someModifierInMultiplePhrases( + preposition.phrases, + whenQuotation, + checker, + ); + } else if (phrase.type === "quotation") { + return whenQuotation; + } else { + throw new Error("unreachable"); + } +} +export function someModifierInMultiplePhrases( + phrases: MultiplePhrases, + whenQuotation: boolean, + checker: (modifier: Modifier) => boolean, +): boolean { + if (phrases.type === "single") { + return someModifierInPhrase(phrases.phrase, whenQuotation, checker); + } else if (phrases.type === "and conjunction" || phrases.type === "anu") { + return phrases.phrases.some((phrases) => + someModifierInMultiplePhrases(phrases, whenQuotation, checker) + ); + } else { + throw new Error("unreachable"); + } +} diff --git a/src/filter.ts b/src/filter.ts index 59e9ffe..196c216 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -5,6 +5,7 @@ import { Phrase, Preposition, Sentence, + someModifierInPhrase, WordUnit, } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; @@ -111,8 +112,22 @@ export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [ }, // pi cannot be nested (modifier) => { + const checker = (modifier: Modifier) => { + if ( + modifier.type === "default" || modifier.type === "proper words" || + modifier.type === "quotation" + ) { + return false; + } else if (modifier.type === "nanpa") { + return someModifierInPhrase(modifier.phrase, false, checker); + } else if (modifier.type === "pi") { + return true; + } else { + throw new Error("unreachable error"); + } + }; if (modifier.type === "pi") { - if (phraseHasPi(modifier.phrase)) { + if (someModifierInPhrase(modifier.phrase, false, checker)) { throw new UnrecognizedError("pi inside pi"); } } @@ -243,46 +258,3 @@ function modifiersIsAlaOrNone(modifiers: Array): boolean { } return true; } -/** Checks if modifiers has _pi_. */ -function modifiersHasPi(modifiers: Array): boolean { - return modifiers.some((modifier) => { - if ( - modifier.type === "default" || modifier.type === "proper words" || - modifier.type === "quotation" - ) { - return false; - } else if (modifier.type === "nanpa") { - return phraseHasPi(modifier.phrase); - } else if (modifier.type === "pi") { - return true; - } else { - throw new Error("unreachable error"); - } - }); -} -/** Checks if a single phrase has _pi_. */ -function phraseHasPi(phrase: Phrase): boolean { - if (phrase.type === "default") { - return modifiersHasPi(phrase.modifiers); - } else if (phrase.type === "preverb") { - return modifiersHasPi(phrase.modifiers) || phraseHasPi(phrase.phrase); - } else if (phrase.type === "preposition") { - const preposition = phrase.preposition; - return modifiersHasPi(preposition.modifiers) || - multiplePhrasesHasPi(preposition.phrases); - } else if (phrase.type === "quotation") { - return false; - } else { - throw new Error("unreachable error"); - } -} -/** Checks if multiple phrases has _pi_. */ -function multiplePhrasesHasPi(phrases: MultiplePhrases): boolean { - if (phrases.type === "single") { - return phraseHasPi(phrases.phrase); - } else if (phrases.type === "and conjunction" || phrases.type === "anu") { - return phrases.phrases.some(multiplePhrasesHasPi); - } else { - throw new Error("unreachable error"); - } -} From 3ae9536bb66d3da46ad544e97dfd1762d45b9f2c Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:30:16 +0800 Subject: [PATCH 236/271] remove done TODO --- src/filter.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index 196c216..2deec74 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -10,7 +10,6 @@ import { } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; -// TODO: AST walker // TODO: filter nested prepositions // TODO: filter preposition in subject and object From c8eb61f7b9bbc7fee2c485ba61851cd9b1936053 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:39:57 +0800 Subject: [PATCH 237/271] disallow nested preposition --- src/ast.ts | 14 ++++++++++++++ src/filter.ts | 22 +++++++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/src/ast.ts b/src/ast.ts index 8afdcca..f7ae872 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -139,3 +139,17 @@ export function someModifierInMultiplePhrases( throw new Error("unreachable"); } } +export function somePhraseInMultiplePhrases( + phrases: MultiplePhrases, + checker: (modifier: Phrase) => boolean, +): boolean { + if (phrases.type === "single") { + return checker(phrases.phrase); + } else if (phrases.type === "and conjunction" || phrases.type === "anu") { + return phrases.phrases.some((phrases) => + somePhraseInMultiplePhrases(phrases, checker) + ); + } else { + throw new Error("unreachable"); + } +} diff --git a/src/filter.ts b/src/filter.ts index 2deec74..3381daa 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,3 +1,4 @@ +import { somePhraseInMultiplePhrases } from "./ast.ts"; import { FullClause, Modifier, @@ -10,7 +11,6 @@ import { } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; -// TODO: filter nested prepositions // TODO: filter preposition in subject and object /** Array of filter rules for a word unit. */ @@ -210,6 +210,26 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ } return true; }, + // Disallow nested preposition + (preposition) => { + const checker = (phrase: Phrase): boolean => { + if (phrase.type === "default") { + return false; + } else if (phrase.type === "preposition") { + return true; + } else if (phrase.type === "preverb") { + return checker(phrase.phrase); + } else if (phrase.type === "quotation") { + return false; + } else { + throw new Error("unreachable"); + } + }; + if (somePhraseInMultiplePhrases(preposition.phrases, checker)) { + throw new UnrecognizedError("Preposition inside preposition"); + } + return true; + }, ]; export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [ // Prevent "taso ala taso" From 0af6940c0dcf4b1fc7614564cff51c34a3560b8a Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:40:42 +0800 Subject: [PATCH 238/271] remove unneeded import --- src/filter.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/src/filter.ts b/src/filter.ts index 3381daa..3afb24c 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -2,7 +2,6 @@ import { somePhraseInMultiplePhrases } from "./ast.ts"; import { FullClause, Modifier, - MultiplePhrases, Phrase, Preposition, Sentence, From 7c0fb6cfc8ea262a5fd377b799b6e9c6daaf4358 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:48:56 +0800 Subject: [PATCH 239/271] disallow preposition in subject --- src/filter.ts | 52 ++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 39 insertions(+), 13 deletions(-) diff --git a/src/filter.ts b/src/filter.ts index 3afb24c..7d6c0d1 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -1,5 +1,7 @@ import { somePhraseInMultiplePhrases } from "./ast.ts"; +import { MultiplePhrases } from "./ast.ts"; import { + Clause, FullClause, Modifier, Phrase, @@ -211,21 +213,32 @@ export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [ }, // Disallow nested preposition (preposition) => { - const checker = (phrase: Phrase): boolean => { - if (phrase.type === "default") { - return false; - } else if (phrase.type === "preposition") { - return true; - } else if (phrase.type === "preverb") { - return checker(phrase.phrase); - } else if (phrase.type === "quotation") { - return false; + if ( + somePhraseInMultiplePhrases(preposition.phrases, hasPrepositionInPhrase) + ) { + throw new UnrecognizedError("Preposition inside preposition"); + } + return true; + }, +]; +/** Array of filter rules for clauses. */ +export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ + // disallow preposition in subject + (clause) => { + let phrases: MultiplePhrases; + if (clause.type === "phrases" || clause.type === "o vocative") { + phrases = clause.phrases; + } else if (clause.type === "li clause" || clause.type === "o clause") { + if (clause.subjects) { + phrases = clause.subjects; } else { - throw new Error("unreachable"); + return true; } - }; - if (somePhraseInMultiplePhrases(preposition.phrases, checker)) { - throw new UnrecognizedError("Preposition inside preposition"); + } else { + return true; + } + if (somePhraseInMultiplePhrases(phrases, hasPrepositionInPhrase)) { + throw new UnrecognizedError("Preposition in subject"); } return true; }, @@ -276,3 +289,16 @@ function modifiersIsAlaOrNone(modifiers: Array): boolean { } return true; } +function hasPrepositionInPhrase(phrase: Phrase): boolean { + if (phrase.type === "default") { + return false; + } else if (phrase.type === "preposition") { + return true; + } else if (phrase.type === "preverb") { + return hasPrepositionInPhrase(phrase.phrase); + } else if (phrase.type === "quotation") { + return false; + } else { + throw new Error("unreachable"); + } +} From 66f6d531f83ffdf5dd0f12eb7b0e2375945d5d1f Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:50:25 +0800 Subject: [PATCH 240/271] actually implement clause filter --- src/parser.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 976bb4d..48412a0 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -19,6 +19,7 @@ import { SPECIAL_SUBJECT, } from "./vocabulary.ts"; import { + CLAUSE_RULE, filter, FULL_CLAUSE_RULE, MODIFIER_RULES, @@ -531,7 +532,7 @@ function clause(): Parser { ).map(([preposition, morePreposition]) => ({ type: "prepositions", prepositions: [preposition, ...morePreposition], - })), + } as Clause)), subjectPhrases().map((phrases) => { if ( phrases.type === "single" && @@ -546,7 +547,7 @@ function clause(): Parser { subjectPhrases().skip(specificWord("o")).map((phrases) => ({ type: "o vocative", phrases, - })), + } as Clause)), sequence( subjectPhrases(), optionalComma().with(specificWord("li")).with( @@ -556,14 +557,14 @@ function clause(): Parser { type: "li clause", subjects, predicates, - })), + } as Clause)), sequence( specificWord("o").with(multiplePredicates(["o", "anu"])), ).map(([predicates]) => ({ type: "o clause", subjects: null, predicates, - })), + } as Clause)), sequence( subjectPhrases(), optionalComma().with(specificWord("o")).with( @@ -573,12 +574,12 @@ function clause(): Parser { type: "o clause", subjects: subjects, predicates, - })), + } as Clause)), quotation().map((quotation) => ({ type: "quotation", quotation, - })), - ); + } as Clause)), + ).filter(filter(CLAUSE_RULE)); } /** Parses a single clause including precaluse and postclause. */ function fullClause(): Parser { From e29dd2beb3eacf5157d557f87219f51507e649ef Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 10:57:03 +0800 Subject: [PATCH 241/271] disallow preposition in object --- src/ast.ts | 20 ++++++++++++++++++++ src/filter.ts | 14 ++++++++++++-- 2 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/ast.ts b/src/ast.ts index f7ae872..3c411f6 100644 --- a/src/ast.ts +++ b/src/ast.ts @@ -153,3 +153,23 @@ export function somePhraseInMultiplePhrases( throw new Error("unreachable"); } } +export function someObjectInMultiplePredicate( + predicate: MultiplePredicates, + checker: (object: Phrase) => boolean, +): boolean { + if (predicate.type === "single") { + return false; + } else if (predicate.type === "associated") { + if (predicate.objects) { + return somePhraseInMultiplePhrases(predicate.objects, checker); + } else { + return false; + } + } else if (predicate.type === "and conjunction" || predicate.type === "anu") { + return predicate.predicates.some((predicates) => + someObjectInMultiplePredicate(predicates, checker) + ); + } else { + throw new Error("unreachable"); + } +} diff --git a/src/filter.ts b/src/filter.ts index 7d6c0d1..856b678 100644 --- a/src/filter.ts +++ b/src/filter.ts @@ -8,12 +8,11 @@ import { Preposition, Sentence, someModifierInPhrase, + someObjectInMultiplePredicate, WordUnit, } from "./ast.ts"; import { UnrecognizedError } from "./error.ts"; -// TODO: filter preposition in subject and object - /** Array of filter rules for a word unit. */ export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [ // avoid "seme ala seme" @@ -242,6 +241,17 @@ export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [ } return true; }, + // disallow preposition in object + (clause) => { + if (clause.type === "li clause" || clause.type === "o clause") { + if ( + someObjectInMultiplePredicate(clause.predicates, hasPrepositionInPhrase) + ) { + throw new UnrecognizedError("Preposition in object"); + } + } + return true; + }, ]; export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [ // Prevent "taso ala taso" From 78ee7591b1b53d666f3c0ad146599ea18aae24b9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 11:29:40 +0800 Subject: [PATCH 242/271] prepare for translating phrases --- src/error.ts | 6 ++++++ src/translator.ts | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/src/error.ts b/src/error.ts index d413bfe..d9d67f6 100644 --- a/src/error.ts +++ b/src/error.ts @@ -9,6 +9,12 @@ export class UnreachableError extends OutputError { super("This is an error you shouldn't see... Please report this error."); } } +/** Represents Error due to things not implemented yet. */ +export class TodoError extends OutputError { + constructor(token: string) { + super(`${token} is not yet implemented.`); + } +} /** Represents Error caused by unrecognized elements. */ export class UnrecognizedError extends OutputError { constructor(token: string) { diff --git a/src/translator.ts b/src/translator.ts index a8262e3..ea027cc 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,7 +1,8 @@ import { Clause } from "./ast.ts"; -import { FullClause, Sentence } from "./ast.ts"; +import { FullClause, MultiplePhrases, Phrase, Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; +import { TodoError } from "./error.ts"; /** A special kind of Output that translators returns. */ export type TranslationOutput = Output; @@ -23,9 +24,38 @@ function rotate>( new Output([[]]), ) as Output; } +function phraseAsNoun( + phrase: Phrase, + named: boolean, + of: boolean, +): TranslationOutput { + throw new Error("todo"); +} +function translateMultiplePhrases( + phrases: MultiplePhrases, + level: 1 | 2, + translator: (phrase: Phrase) => TranslationOutput, +): TranslationOutput { + throw new Error("todo"); +} /** Translates a clause. */ function translateClause(clause: Clause): TranslationOutput { - throw new Error("todo"); + if (clause.type === "phrases") { + return translateMultiplePhrases( + clause.phrases, + 2, + (phrase) => phraseAsNoun(phrase, true, true), + ); + } else if (clause.type === "o vocative") { + return translateMultiplePhrases( + clause.phrases, + 2, + (phrase) => + phraseAsNoun(phrase, true, true).map((phrase) => `hey ${phrase}`), + ); + } else { + return new Output(new TodoError(`translation for ${clause.type}`)); + } } /** Translates a full clause. */ function translateFullClause(fullClause: FullClause): TranslationOutput { From 65d90bc5f3680b5afce0324da3f84389201533fe Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 11:31:20 +0800 Subject: [PATCH 243/271] small change --- src/translator.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index ea027cc..d8a5de9 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -33,8 +33,8 @@ function phraseAsNoun( } function translateMultiplePhrases( phrases: MultiplePhrases, - level: 1 | 2, translator: (phrase: Phrase) => TranslationOutput, + level: 1 | 2 = 2, ): TranslationOutput { throw new Error("todo"); } @@ -43,13 +43,11 @@ function translateClause(clause: Clause): TranslationOutput { if (clause.type === "phrases") { return translateMultiplePhrases( clause.phrases, - 2, (phrase) => phraseAsNoun(phrase, true, true), ); } else if (clause.type === "o vocative") { return translateMultiplePhrases( clause.phrases, - 2, (phrase) => phraseAsNoun(phrase, true, true).map((phrase) => `hey ${phrase}`), ); From f5f323cdd208b90c582f815877891b0e5b0c01de Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 11:48:48 +0800 Subject: [PATCH 244/271] implement translation for compound phrases --- src/translator.ts | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index d8a5de9..7ad8d53 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -34,9 +34,45 @@ function phraseAsNoun( function translateMultiplePhrases( phrases: MultiplePhrases, translator: (phrase: Phrase) => TranslationOutput, - level: 1 | 2 = 2, + level = 2, ): TranslationOutput { - throw new Error("todo"); + if (phrases.type === "single") { + return translator(phrases.phrase); + } else if (phrases.type === "and conjunction" || phrases.type === "anu") { + let conjunction: string; + if (phrases.type === "and conjunction") { + conjunction = "and"; + } else { + conjunction = "or"; + } + const translations = rotate( + phrases.phrases.map((phrases) => + translateMultiplePhrases(phrases, translator, level - 1) + ), + ); + if (level === 2) { + return translations.map((phrases) => { + if (phrases.length === 2) { + return [phrases[0], conjunction, phrases[1]].join(" "); + } else { + const comma = phrases.slice(0, phrases.length - 1); + const last = phrases[phrases.length - 1]; + return [ + comma.map((translation) => [translation, ", "].join()).join(), + conjunction, + " ", + last, + ].join(""); + } + }); + } else { + return translations.map((phrases) => + phrases.join([" ", conjunction, " "].join()) + ); + } + } else { + throw new Error("unreachable"); + } } /** Translates a clause. */ function translateClause(clause: Clause): TranslationOutput { From 347d8961ecaeed1a8591280aac79e95a43713bbb Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 11:49:26 +0800 Subject: [PATCH 245/271] implement fail safe --- src/translator.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index 7ad8d53..847fdd1 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -65,10 +65,12 @@ function translateMultiplePhrases( ].join(""); } }); - } else { + } else if (level === 1) { return translations.map((phrases) => phrases.join([" ", conjunction, " "].join()) ); + } else { + throw new Error("unreachable"); } } else { throw new Error("unreachable"); From b3735b2ef0746dfb2892de571a4815c744a8c835 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:00:10 +0800 Subject: [PATCH 246/271] small change --- src/output.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/output.ts b/src/output.ts index 5f15691..2e6f803 100644 --- a/src/output.ts +++ b/src/output.ts @@ -18,7 +18,7 @@ export class Output { this.error = output; } else { this.output = []; - this.error = new OutputError("no error provided"); + this.error = new OutputError(); } } private setError(error: OutputError) { From 01b1eed1cb5d75119f7c09837bf4248f2ddc5769 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:04:40 +0800 Subject: [PATCH 247/271] add concat method --- src/output.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/output.ts b/src/output.ts index 2e6f803..f725ae0 100644 --- a/src/output.ts +++ b/src/output.ts @@ -73,4 +73,11 @@ export class Output { for (const value of this.output) wholeOutput.append(mapper(value)); return wholeOutput; } + static concat(...outputs: Array>): Output { + const wholeOutput = new Output(); + for (const output of outputs) { + wholeOutput.append(output); + } + return wholeOutput; + } } From 663d032a00de55800dc5b811e1aac87d61d489d9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:05:01 +0800 Subject: [PATCH 248/271] simplify function parameter --- src/translator.ts | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 847fdd1..7c10714 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -26,8 +26,8 @@ function rotate>( } function phraseAsNoun( phrase: Phrase, - named: boolean, - of: boolean, + named = true, + of = true, ): TranslationOutput { throw new Error("todo"); } @@ -79,15 +79,11 @@ function translateMultiplePhrases( /** Translates a clause. */ function translateClause(clause: Clause): TranslationOutput { if (clause.type === "phrases") { - return translateMultiplePhrases( - clause.phrases, - (phrase) => phraseAsNoun(phrase, true, true), - ); + return translateMultiplePhrases(clause.phrases, phraseAsNoun); } else if (clause.type === "o vocative") { return translateMultiplePhrases( clause.phrases, - (phrase) => - phraseAsNoun(phrase, true, true).map((phrase) => `hey ${phrase}`), + (phrase) => phraseAsNoun(phrase).map((phrase) => `hey ${phrase}`), ); } else { return new Output(new TodoError(`translation for ${clause.type}`)); From fdf5afc6fdb89cb7b8b444b66aec11cb53580508 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:41:14 +0800 Subject: [PATCH 249/271] remove unneeded filter in parser --- src/parser.ts | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/src/parser.ts b/src/parser.ts index 48412a0..6c4115c 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -533,17 +533,7 @@ function clause(): Parser { type: "prepositions", prepositions: [preposition, ...morePreposition], } as Clause)), - subjectPhrases().map((phrases) => { - if ( - phrases.type === "single" && - (phrases.phrase.type === "preposition" || - phrases.phrase.type === "quotation") - ) { - throw new UnreachableError(); - } else { - return { type: "phrases", phrases } as Clause; - } - }), + subjectPhrases().map((phrases) => ({ type: "phrases", phrases } as Clause)), subjectPhrases().skip(specificWord("o")).map((phrases) => ({ type: "o vocative", phrases, From 8dac0458dfd95072cd039fb380b4a08fc112d668 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:42:22 +0800 Subject: [PATCH 250/271] revert, this is partly needed --- src/parser.ts | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/parser.ts b/src/parser.ts index 6c4115c..14df87a 100644 --- a/src/parser.ts +++ b/src/parser.ts @@ -533,7 +533,13 @@ function clause(): Parser { type: "prepositions", prepositions: [preposition, ...morePreposition], } as Clause)), - subjectPhrases().map((phrases) => ({ type: "phrases", phrases } as Clause)), + subjectPhrases().map((phrases) => { + if (phrases.type === "single" && phrases.phrase.type === "quotation") { + throw new UnreachableError(); + } else { + return { type: "phrases", phrases } as Clause; + } + }), subjectPhrases().skip(specificWord("o")).map((phrases) => ({ type: "o vocative", phrases, From 4e21c43f1d07433ab0ed5e142c48d9671fc55a9d Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 12:54:21 +0800 Subject: [PATCH 251/271] preparation --- src/definition.ts | 2 +- src/translator.ts | 43 ++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 41 insertions(+), 4 deletions(-) diff --git a/src/definition.ts b/src/definition.ts index 02950e7..f94eab8 100644 --- a/src/definition.ts +++ b/src/definition.ts @@ -5,7 +5,7 @@ export type Translation = { adverb: Array; }; /** Record of word translations. */ -export const TRANSLATION: { [key: string]: Translation } = { +export const DEFINITION: { [key: string]: Translation } = { akesi: { noun: ["reptile", "reptiles", "amphibian", "amphibians"], adjective: ["reptilian", "amphibian"], diff --git a/src/translator.ts b/src/translator.ts index 7c10714..f8b34cd 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -3,6 +3,8 @@ import { FullClause, MultiplePhrases, Phrase, Sentence } from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; import { TodoError } from "./error.ts"; +import { DEFINITION } from "./definition.ts"; +import { OutputError } from "./error.ts"; /** A special kind of Output that translators returns. */ export type TranslationOutput = Output; @@ -24,12 +26,47 @@ function rotate>( new Output([[]]), ) as Output; } +function definition( + kind: "noun" | "adjective" | "adverb", + word: string, +): TranslationOutput { + return Output.concat( + new Output(new OutputError(`No ${kind} translation found for ${word}.`)), + new Output(DEFINITION[word][kind]), + ); +} +function nounDefinition(word: string): TranslationOutput { + return definition("noun", word); +} +function adjectiveDefinition(word: string): TranslationOutput { + return definition("adjective", word); +} +function adverbDefinition(word: string): TranslationOutput { + return definition("adverb", word); +} +function defaultPhraseAsNoun( + phrase: Phrase & { type: "default" }, + options?: { + named?: boolean; + suffix?: boolean; + }, +): TranslationOutput { + const named = options?.named ?? true; + const suffix = options?.suffix ?? true; + throw new Error("todo"); +} function phraseAsNoun( phrase: Phrase, - named = true, - of = true, + options?: { + named?: boolean; + suffix?: boolean; + }, ): TranslationOutput { - throw new Error("todo"); + if (phrase.type === "default") { + return defaultPhraseAsNoun(phrase, options); + } else { + return new Output(new TodoError(`translation of ${phrase.type}`)); + } } function translateMultiplePhrases( phrases: MultiplePhrases, From 6431be0ef1040a3ca9a1d2e2cf7f661bd149b9a3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:01:06 +0800 Subject: [PATCH 252/271] add translator to word unit and numbers --- src/translator.ts | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index f8b34cd..43d91e6 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,14 +1,29 @@ import { Clause } from "./ast.ts"; -import { FullClause, MultiplePhrases, Phrase, Sentence } from "./ast.ts"; +import { + FullClause, + MultiplePhrases, + Phrase, + Sentence, + WordUnit, +} from "./ast.ts"; import { Output } from "./output.ts"; import { parser } from "./parser.ts"; import { TodoError } from "./error.ts"; import { DEFINITION } from "./definition.ts"; import { OutputError } from "./error.ts"; +import { UnreachableError } from "./error.ts"; /** A special kind of Output that translators returns. */ export type TranslationOutput = Output; +const WORD_TO_NUMBER: { [word: string]: number } = { + ale: 100, + ali: 100, + mute: 20, + luka: 5, + tu: 2, + wan: 1, +}; /** * Helper function for turning array or tuple of Output into Output of array or * tuple. @@ -44,6 +59,22 @@ function adjectiveDefinition(word: string): TranslationOutput { function adverbDefinition(word: string): TranslationOutput { return definition("adverb", word); } +function number(words: Array): number { + return words.reduce((number, word) => number + WORD_TO_NUMBER[word], 0); +} +function wordUnitAsNoun(word: WordUnit): TranslationOutput { + if (word.type === "default") { + return nounDefinition(word.word); + } else if (word.type === "numbers") { + return new Output([number(word.numbers).toString()]); + } else if (word.type === "reduplication") { + return nounDefinition(word.word).map((noun) => + new Array(word.count).fill(noun).join(" ") + ); + } else { + return new Output(new UnreachableError()); + } +} function defaultPhraseAsNoun( phrase: Phrase & { type: "default" }, options?: { From f36808df46d4443e69f9d97f29a7c0f3fd479a5c Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:04:57 +0800 Subject: [PATCH 253/271] implement word unit to adjective translation --- src/translator.ts | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/translator.ts b/src/translator.ts index 43d91e6..c0d2c4f 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -75,6 +75,19 @@ function wordUnitAsNoun(word: WordUnit): TranslationOutput { return new Output(new UnreachableError()); } } +function wordUnitAsAdjective(word: WordUnit): TranslationOutput { + if (word.type === "default") { + return adjectiveDefinition(word.word); + } else if (word.type === "numbers") { + return new Output([number(word.numbers).toString()]); + } else if (word.type === "reduplication") { + return adjectiveDefinition(word.word).map((noun) => + new Array(word.count).fill(noun).join(" ") + ); + } else { + return new Output(new UnreachableError()); + } +} function defaultPhraseAsNoun( phrase: Phrase & { type: "default" }, options?: { From f768edb076b08dce70076b89b5d5d76f6fabb250 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:05:56 +0800 Subject: [PATCH 254/271] avoid code duplication --- src/translator.ts | 31 ++++++------------------------- 1 file changed, 6 insertions(+), 25 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index c0d2c4f..fe98c16 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -50,38 +50,19 @@ function definition( new Output(DEFINITION[word][kind]), ); } -function nounDefinition(word: string): TranslationOutput { - return definition("noun", word); -} -function adjectiveDefinition(word: string): TranslationOutput { - return definition("adjective", word); -} -function adverbDefinition(word: string): TranslationOutput { - return definition("adverb", word); -} function number(words: Array): number { return words.reduce((number, word) => number + WORD_TO_NUMBER[word], 0); } -function wordUnitAsNoun(word: WordUnit): TranslationOutput { - if (word.type === "default") { - return nounDefinition(word.word); - } else if (word.type === "numbers") { - return new Output([number(word.numbers).toString()]); - } else if (word.type === "reduplication") { - return nounDefinition(word.word).map((noun) => - new Array(word.count).fill(noun).join(" ") - ); - } else { - return new Output(new UnreachableError()); - } -} -function wordUnitAsAdjective(word: WordUnit): TranslationOutput { +function wordUnitAs( + kind: "noun" | "adjective" | "adverb", + word: WordUnit, +): TranslationOutput { if (word.type === "default") { - return adjectiveDefinition(word.word); + return definition(kind, word.word); } else if (word.type === "numbers") { return new Output([number(word.numbers).toString()]); } else if (word.type === "reduplication") { - return adjectiveDefinition(word.word).map((noun) => + return definition(kind, word.word).map((noun) => new Array(word.count).fill(noun).join(" ") ); } else { From 180e836a8ae276e8411dedfaef7e3017f9669c20 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:55:50 +0800 Subject: [PATCH 255/271] implement ugly noun translator --- src/translator.ts | 113 +++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 112 insertions(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index fe98c16..768cbb4 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -1,6 +1,7 @@ import { Clause } from "./ast.ts"; import { FullClause, + Modifier, MultiplePhrases, Phrase, Sentence, @@ -69,6 +70,28 @@ function wordUnitAs( return new Output(new UnreachableError()); } } +function defaultPhraseAsAdjective( + phrase: Phrase & { type: "default" }, + options?: { + suffix?: boolean; + }, +): TranslationOutput { + const suffix = options?.suffix ?? true; + throw new Error("todo"); +} +function modifierAsAdjective(modifier: Modifier): TranslationOutput { + if (modifier.type === "default") { + return wordUnitAs("adjective", modifier.word); + } else if (modifier.type === "nanpa" || modifier.type === "proper words") { + return new Output(); + } else if (modifier.type === "pi") { + return phraseAsAdjective(modifier.phrase, { suffix: false }); + } else { + return new Output( + new TodoError(`translating ${modifier.type} as adjective`), + ); + } +} function defaultPhraseAsNoun( phrase: Phrase & { type: "default" }, options?: { @@ -78,7 +101,95 @@ function defaultPhraseAsNoun( ): TranslationOutput { const named = options?.named ?? true; const suffix = options?.suffix ?? true; - throw new Error("todo"); + const name = ( + phrase.modifiers.filter( + (modifier) => modifier.type === "proper words", + )[0] as undefined | (Modifier & { type: "proper words" }) + )?.words; + if (name && !named) { + return new Output(); + } + const headWord = wordUnitAs("noun", phrase.headWord); + const modifierNoName = phrase.modifiers.filter(( + modifier, + ) => modifier.type !== "proper words"); + const modifiers: Array = modifierNoName.map( + modifierAsAdjective, + ); + const translations = rotate([headWord, rotate(modifiers)] as const).map( + ([headWord, modifiers]) => + [...modifiers.slice().reverse(), headWord].join(" "), + ).map( + (translation) => { + if (name) { + return `${translation} named ${name}`; + } else { + return translation; + } + }, + ); + if (suffix) { + const extraTranslations: Array = [ + ...modifierNoName.keys(), + ].map( + (i) => { + const suffix = modifierNoName[i]; + let suffixOutput: TranslationOutput; + if (suffix.type === "default") { + suffixOutput = wordUnitAs("noun", suffix.word).map((translation) => + `of ${translation}` + ); + } else if (suffix.type === "nanpa") { + suffixOutput = phraseAsNoun(suffix.phrase, { suffix: false }).map( + (translation) => `in position ${translation}`, + ); + } else if (suffix.type === "pi") { + suffixOutput = phraseAsNoun(suffix.phrase, { suffix: false }).map( + (translation) => `of ${translation}`, + ); + } else if (suffix.type === "proper words") { + throw new Error("unreachable"); + } else { + return new Output( + new TodoError(`translation of ${suffix.type} as noun`), + ); + } + const modifiers = [ + ...modifierNoName.slice(0, i), + ...modifierNoName.slice(i + 1), + ].map(modifierAsAdjective); + return rotate([headWord, rotate(modifiers)] as const).map( + ([headWord, modifiers]) => + [...modifiers.slice().reverse(), headWord].join(" "), + ).map( + (translation) => { + if (name) { + return `${translation} named ${name}`; + } else { + return translation; + } + }, + ).flatMap((left) => + suffixOutput.map((right) => [left, right].join(" ")) + ); + }, + ); + return Output.concat(translations, ...extraTranslations); + } else { + return translations; + } +} +function phraseAsAdjective( + phrase: Phrase, + options?: { + suffix?: boolean; + }, +): TranslationOutput { + if (phrase.type === "default") { + return defaultPhraseAsNoun(phrase, options); + } else { + return new Output(new TodoError(`translation of ${phrase.type}`)); + } } function phraseAsNoun( phrase: Phrase, From 3e872a2dc2d3a4956039d3dbf176af1818012105 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:56:30 +0800 Subject: [PATCH 256/271] change todo error --- src/translator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index 768cbb4..7211f78 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -77,7 +77,7 @@ function defaultPhraseAsAdjective( }, ): TranslationOutput { const suffix = options?.suffix ?? true; - throw new Error("todo"); + return new Output(new TodoError(`translation to adjective`)); } function modifierAsAdjective(modifier: Modifier): TranslationOutput { if (modifier.type === "default") { From 4b5679913e76854b514ba9f9260d4dc0e239c2a3 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 13:56:52 +0800 Subject: [PATCH 257/271] fix --- src/translator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/translator.ts b/src/translator.ts index 7211f78..a6b2a74 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -186,7 +186,7 @@ function phraseAsAdjective( }, ): TranslationOutput { if (phrase.type === "default") { - return defaultPhraseAsNoun(phrase, options); + return defaultPhraseAsAdjective(phrase, options); } else { return new Output(new TodoError(`translation of ${phrase.type}`)); } From 0ea2c9cd9509e23f63411c87be461082cd240776 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:29:06 +0800 Subject: [PATCH 258/271] genericized translation functions --- src/translator.ts | 156 ++++++++++++++++++++++++---------------------- 1 file changed, 80 insertions(+), 76 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index a6b2a74..61036f7 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -70,29 +70,58 @@ function wordUnitAs( return new Output(new UnreachableError()); } } -function defaultPhraseAsAdjective( - phrase: Phrase & { type: "default" }, - options?: { - suffix?: boolean; - }, +function modifierAs( + kind: "noun" | "adjective" | "adverb", + modifier: Modifier, ): TranslationOutput { - const suffix = options?.suffix ?? true; - return new Output(new TodoError(`translation to adjective`)); -} -function modifierAsAdjective(modifier: Modifier): TranslationOutput { if (modifier.type === "default") { - return wordUnitAs("adjective", modifier.word); + return wordUnitAs(kind, modifier.word); } else if (modifier.type === "nanpa" || modifier.type === "proper words") { - return new Output(); + return new Output(); } else if (modifier.type === "pi") { - return phraseAsAdjective(modifier.phrase, { suffix: false }); + if (kind === "adverb") { + return new Output(); + } + return phraseAs(kind, modifier.phrase, { named: false, suffix: false }); } else { - return new Output( + return new Output( new TodoError(`translating ${modifier.type} as adjective`), ); } } -function defaultPhraseAsNoun( +function modifierAsSuffix( + kind: "noun" | "adjective", + suffix: Modifier, +): TranslationOutput { + let construction: string; + if (kind === "noun") { + construction = "of X"; + } else { + construction = "in X way"; + } + if (suffix.type === "default") { + return wordUnitAs(kind, suffix.word).map((translation) => + construction.replace("X", translation) + ); + } else if (suffix.type === "nanpa") { + const named = kind === "noun"; + return phraseAs(kind, suffix.phrase, { named, suffix: false }).map( + (translation) => `in position ${translation}`, + ); + } else if (suffix.type === "pi") { + return phraseAs(kind, suffix.phrase, { named: false, suffix: false }).map(( + translation, + ) => construction.replace("X", translation)); + } else if (suffix.type === "proper words") { + return new Output([`named ${suffix.words}`]); + } else { + return new Output( + new TodoError(`translation of ${suffix.type} as noun`), + ); + } +} +function defaultPhraseAs( + kind: "noun" | "adjective", phrase: Phrase & { type: "default" }, options?: { named?: boolean; @@ -109,56 +138,43 @@ function defaultPhraseAsNoun( if (name && !named) { return new Output(); } - const headWord = wordUnitAs("noun", phrase.headWord); + let modifierKind: "adjective" | "adverb"; + if (kind === "noun") { + modifierKind = "adjective"; + } else if (kind === "adjective") { + modifierKind = "adverb"; + } + const headWord = wordUnitAs(kind, phrase.headWord); const modifierNoName = phrase.modifiers.filter(( modifier, ) => modifier.type !== "proper words"); - const modifiers: Array = modifierNoName.map( - modifierAsAdjective, - ); - const translations = rotate([headWord, rotate(modifiers)] as const).map( - ([headWord, modifiers]) => - [...modifiers.slice().reverse(), headWord].join(" "), - ).map( - (translation) => { - if (name) { - return `${translation} named ${name}`; - } else { - return translation; - } - }, + const modifierTranslation: Array = modifierNoName.map( + (modifier) => modifierAs(modifierKind, modifier), ); + const translations = rotate([headWord, rotate(modifierTranslation)] as const) + .map( + ([headWord, modifiers]) => + [...modifiers.slice().reverse(), headWord].join(" "), + ).map( + (translation) => { + if (name) { + return `${translation} named ${name}`; + } else { + return translation; + } + }, + ); if (suffix) { const extraTranslations: Array = [ ...modifierNoName.keys(), ].map( (i) => { - const suffix = modifierNoName[i]; - let suffixOutput: TranslationOutput; - if (suffix.type === "default") { - suffixOutput = wordUnitAs("noun", suffix.word).map((translation) => - `of ${translation}` - ); - } else if (suffix.type === "nanpa") { - suffixOutput = phraseAsNoun(suffix.phrase, { suffix: false }).map( - (translation) => `in position ${translation}`, - ); - } else if (suffix.type === "pi") { - suffixOutput = phraseAsNoun(suffix.phrase, { suffix: false }).map( - (translation) => `of ${translation}`, - ); - } else if (suffix.type === "proper words") { - throw new Error("unreachable"); - } else { - return new Output( - new TodoError(`translation of ${suffix.type} as noun`), - ); - } - const modifiers = [ + const suffixTranslation = modifierAsSuffix(kind, modifierNoName[i]); + const modifierTranslation = [ ...modifierNoName.slice(0, i), ...modifierNoName.slice(i + 1), - ].map(modifierAsAdjective); - return rotate([headWord, rotate(modifiers)] as const).map( + ].map((modifier) => modifierAs(modifierKind, modifier)); + return rotate([headWord, rotate(modifierTranslation)] as const).map( ([headWord, modifiers]) => [...modifiers.slice().reverse(), headWord].join(" "), ).map( @@ -170,7 +186,7 @@ function defaultPhraseAsNoun( } }, ).flatMap((left) => - suffixOutput.map((right) => [left, right].join(" ")) + suffixTranslation.map((right) => [left, right].join(" ")) ); }, ); @@ -179,27 +195,12 @@ function defaultPhraseAsNoun( return translations; } } -function phraseAsAdjective( - phrase: Phrase, - options?: { - suffix?: boolean; - }, -): TranslationOutput { +function phraseAs(kind: "noun" | "adjective", phrase: Phrase, options?: { + named?: boolean; + suffix?: boolean; +}): TranslationOutput { if (phrase.type === "default") { - return defaultPhraseAsAdjective(phrase, options); - } else { - return new Output(new TodoError(`translation of ${phrase.type}`)); - } -} -function phraseAsNoun( - phrase: Phrase, - options?: { - named?: boolean; - suffix?: boolean; - }, -): TranslationOutput { - if (phrase.type === "default") { - return defaultPhraseAsNoun(phrase, options); + return defaultPhraseAs(kind, phrase, options); } else { return new Output(new TodoError(`translation of ${phrase.type}`)); } @@ -252,11 +253,14 @@ function translateMultiplePhrases( /** Translates a clause. */ function translateClause(clause: Clause): TranslationOutput { if (clause.type === "phrases") { - return translateMultiplePhrases(clause.phrases, phraseAsNoun); + return translateMultiplePhrases( + clause.phrases, + (phrase) => phraseAs("noun", phrase), + ); } else if (clause.type === "o vocative") { return translateMultiplePhrases( clause.phrases, - (phrase) => phraseAsNoun(phrase).map((phrase) => `hey ${phrase}`), + (phrase) => phraseAs("noun", phrase).map((phrase) => `hey ${phrase}`), ); } else { return new Output(new TodoError(`translation for ${clause.type}`)); From 539a1af36b530e10a8c842ee23af9102d2492baa Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:33:08 +0800 Subject: [PATCH 259/271] allow sole phrases to be translated as adjectives --- src/translator.ts | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 61036f7..552403d 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -253,10 +253,33 @@ function translateMultiplePhrases( /** Translates a clause. */ function translateClause(clause: Clause): TranslationOutput { if (clause.type === "phrases") { - return translateMultiplePhrases( - clause.phrases, + const hasEn = (phrases: MultiplePhrases): boolean => { + if (phrases.type === "single") { + return false; + } else if (phrases.type === "and conjunction") { + return true; + } else if (phrases.type === "anu") { + return phrases.phrases.some(hasEn); + } else { + throw new Error("unreachable"); + } + }; + const phrases = clause.phrases; + const translations = translateMultiplePhrases( + phrases, (phrase) => phraseAs("noun", phrase), ); + if (hasEn(phrases)) { + return translations; + } else { + return Output.concat( + translateMultiplePhrases( + phrases, + (phrase) => phraseAs("adjective", phrase), + ), + translations, + ); + } } else if (clause.type === "o vocative") { return translateMultiplePhrases( clause.phrases, From bf86c4286a13d1cbd3b405a492e68671249676a9 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:36:26 +0800 Subject: [PATCH 260/271] remove main.js --- main.js | 1228 ------------------------------------------------------- 1 file changed, 1228 deletions(-) delete mode 100644 main.js diff --git a/main.js b/main.js deleted file mode 100644 index a2f0ea6..0000000 --- a/main.js +++ /dev/null @@ -1,1228 +0,0 @@ -"use strict"; - -// This code is no longer needed and being replaced by `src/` - -class UnrecognizedError extends Error {} -class UntranslatableError extends Error {} - -const PARTICLES = new Set([ - "a", - "ala", - "anu", - "e", - "en", - "la", - "li", - "nanpa", - "o", - "pi", - "taso", -]); -const HEADWORD = new Set([ - "akesi", - "ala", - "alasa", - "ale", - "ali", - "anpa", - "ante", - "awen", - "esun", - "ijo", - "ike", - "ilo", - "insa", - "jaki", - "jan", - "jelo", - "jo", - "kala", - "kalama", - "kama", - "kasi", - "ken", - "kili", - "kiwen", - "ko", - "kon", - "kule", - "kulupu", - "kute", - "lape", - "laso", - "lawa", - "len", - "lete", - "lili", - "linja", - "lipu", - "loje", - "lon", - "luka", - "lukin", - "lupa", - "ma", - "mama", - "mani", - "meli", - "mi", - "mije", - "moku", - "moli", - "monsi", - "mu", - "mun", - "musi", - "mute", - "nanpa", - "nasa", - "nasin", - "nena", - "ni", - "nimi", - "noka", - "olin", - "ona", - "open", - "pakala", - "pali", - "palisa", - "pan", - "pana", - "pilin", - "pimeja", - "pini", - "pipi", - "poka", - "poki", - "pona", - "sama", - "seli", - "selo", - "seme", - "sewi", - "sijelo", - "sike", - "sin", - "sina", - "sinpin", - "sitelen", - "sona", - "soweli", - "suli", - "suno", - "supa", - "suwi", - "tan", - "tawa", - "telo", - "tenpo", - "toki", - "tomo", - "tonsi", - "tu", - "unpa", - "uta", - "utala", - "walo", - "wan", - "waso", - "wawa", - "weka", - "wile", -]); -const MODIFIER = new Set([...HEADWORD, "taso"]); -const PREVERB = new Set([ - "alasa", - "awen", - "kama", - "ken", - "lukin", - "open", - "pini", - "sona", - "wile", -]); -const PREPOSITION = new Set(["kepeken", "lon", "sama", "tan", "tawa"]); -let VOCABULARY = new Set([ - ...PARTICLES, - ...HEADWORD, - ...PREVERB, - ...PREPOSITION, -]); -const NOUN = { - akesi: ["reptile", "reptiles", "amphibian", "amphibians"], - ala: ["nothing", "no"], - alasa: ["searching"], - ale: ["everything"], - ali: ["everything"], - anpa: ["bottom", "bottoms", "under"], - ante: ["changing"], - awen: ["staying"], - esun: ["shop", "shops"], - ijo: ["thing", "things"], - ike: ["badness"], - ilo: ["tool", "tools"], - insa: ["inside", "insides"], - jaki: ["obscenity", "obscenities"], - jan: ["person", "people", "human", "humans", "humanity"], - jelo: ["yellowness"], - jo: ["possession", "possessions"], - kala: ["fish", "fishes"], - kalama: ["sound", "sounds"], - kama: ["arriving"], - kasi: ["plant", "plants"], - ken: ["ability", "abilities", "possibility", "possibilities"], - kili: ["fruit", "fruits", "vegetable", "vegetables"], - kiwen: ["hard thing", "hard things"], - ko: ["soft thing", "soft things", "powder"], - kon: ["air", "essence"], - kule: ["color", "colors"], - kulupu: ["group", "groups"], - kute: ["ear", "ears", "listening"], - lape: ["sleep", "rest"], - laso: ["blueness", "greenness"], - lawa: ["head", "heads", "control", "controls"], - len: ["cloth", "clothes", "hiding"], - lete: ["coldness"], - lili: ["smallness"], - linja: ["long flexible thing", "long flexible things"], - lipu: ["book", "books", "paper", "paper-like thing", "paper-like things"], - loje: ["redness"], - lon: ["truth", "true"], - luka: ["hand", "hands", "arm", "arms"], - lukin: ["eye", "eyes", "sight"], - lupa: ["hole", "holes"], - ma: ["place", "places", "earth"], - mama: ["parent", "parents", "creator", "creators"], - mani: ["money", "large domestic animal", "large domestic animals"], - meli: ["woman", "women", "feminity"], - mi: ["I", "me", "we", "us"], - mije: ["man", "men", "masculinity"], - moku: ["food", "foods", "drink", "drinks"], - moli: ["death"], - monsi: ["back"], - mu: ["moo"], - mun: ["celestial object", "celestial objects", "glowing thing"], - musi: ["entertainment", "entertainments"], - mute: ["many"], - nanpa: ["number", "numbers"], - nasa: ["silliness", "strangeness"], - nasin: ["way"], - nena: ["bump"], - ni: ["this", "that"], - nimi: ["name", "names", "word", "words"], - noka: ["foot", "feet", "leg", "legs"], - olin: ["love"], - ona: ["they", "them", "it"], - open: ["beginning", "beginnings"], - pakala: ["mistake", "mistakes"], - pan: ["grain", "grains"], - pana: ["giving"], - pali: ["work"], - palisa: ["long hard thing", "long hard things"], - pilin: ["emotion", "emotions"], - pimeja: ["blackness", "brownness", "grayness"], - pini: ["end", "ends"], - pipi: ["insect", "insects", "bug", "bugs"], - poka: ["side", "sides", "hips"], - poki: ["container"], - pona: ["goodness", "simplicity"], - sama: ["similarity"], - seli: ["fire", "heat", "chemical reaction", "chemical reactions"], - selo: ["outer form", "skin", "boundary", "boundaries"], - seme: ["what", "which"], - sewi: ["above", "divinity"], - sijelo: ["body", "bodies"], - sike: ["round thing", "round things", "cycle"], - sin: ["new thing", "new things"], - sina: ["you", "you all"], - sinpin: ["face", "faces", "wall", "walls"], - sitelen: ["writing", "writings", "image", "images"], - sona: ["knowledge"], - soweli: ["animal", "animals"], - suli: ["hugeness", "importance"], - suno: ["light source", "light sources", "sun"], - supa: ["horizontal surface", "horizontal surfaces"], - suwi: ["sweetness", "cuteness", "innocence"], - tan: ["reason", "origin"], - tawa: ["movement"], - telo: ["liquid"], - tenpo: ["time"], - toki: ["communication", "communications", "language", "languages", "hello"], - tomo: ["house", "houses"], - tonsi: ["transgender", "transgenders", "non-binary", "non-binaries"], - tu: ["pair"], - unpa: ["sex"], - uta: ["mouth"], - utala: ["conflict", "difficulty"], - walo: ["whiteness", "paleness"], - wan: ["one"], - waso: ["bird", "birds"], - wawa: ["power", "powers"], - weka: ["leaving"], - wile: ["want", "wants", "need", "needs"], -}; -const ADJECTIVE = { - akesi: ["reptilian", "amphibian"], - ala: ["not", "no"], - alasa: [], - ale: ["all"], - ali: ["all"], - anpa: ["bottom"], - ante: ["different", "other"], - awen: ["staying"], - esun: [], - ijo: [], - ike: ["bad"], - ilo: [], - insa: [], - jaki: ["gross"], - jan: ["person-like"], - jelo: ["yellow"], - jo: [], - kala: ["fish-like"], - kalama: ["sounding"], - kama: ["arriving"], - kasi: ["plant-like"], - ken: [], - kili: [], - kiwen: ["hard"], - ko: ["soft"], - kon: [], - kule: ["colorful"], - kulupu: [], - kute: [], - lape: ["sleeping"], - laso: ["blue", "green"], - lawa: ["controlling"], - len: ["hidden"], - lete: ["cold", "uncooked"], - lili: ["small"], - linja: ["long flexible"], - lipu: ["paper-like"], - loje: ["red"], - lon: ["truthful"], - luka: [], - lukin: [], - lupa: [], - ma: ["earthy"], - mama: [], - mani: [], - meli: ["woman", "feminine"], - mi: ["my", "our"], - mije: ["man", "masculine"], - moku: [], - moli: ["dead", "deadly"], - monsi: [], - mu: ["mooing"], - mun: ["glowing"], - musi: ["entertaining"], - mute: ["many"], - nanpa: ["numeric"], - nasa: ["silly", "strange"], - nasin: [], - nena: [], - ni: ["this", "that"], - nimi: [], - noka: [], - olin: [], - ona: ["their", "its"], - open: [], - pakala: ["broken"], - pan: [], - pana: [], - pali: ["working"], - palisa: ["long hard"], - pilin: [], - pimeja: ["black", "brown", "gray"], - pini: ["ended"], - pipi: ["bug-like", "insect-like"], - poka: [], - poki: [], - pona: ["good", "simple"], - sama: [], - seli: ["hot"], - selo: [], - seme: ["what", "which"], - sewi: ["divine"], - sijelo: [], - sike: ["round"], - sin: ["new"], - sina: ["your"], - sinpin: [], - sitelen: [], - sona: ["knowledgeable"], - soweli: ["animal-like"], - suli: ["huge", "important"], - suno: ["shining"], - supa: [], - suwi: ["sweet", "cute", "innocent"], - tan: [], - tawa: ["moving"], - telo: ["liquid"], - tenpo: [], - toki: ["communicating"], - tomo: [], - tonsi: ["transgender", "non-binary"], - tu: ["two"], - unpa: ["sexual"], - uta: [], - utala: ["conflicting", "difficult"], - walo: ["white", "pale"], - wan: ["one"], - waso: ["bird-like"], - wawa: ["powerful"], - weka: ["leaving"], - wile: [], -}; -const ADVERB = { - akesi: [], - ala: ["not"], - alasa: [], - ale: ["completely"], - ali: ["completely"], - anpa: [], - ante: ["differently"], - awen: [], - esun: [], - ijo: [], - ike: ["badly"], - ilo: [], - insa: [], - jaki: ["disgustingly"], - jan: [], - jelo: [], - jo: [], - kala: [], - kalama: [], - kama: [], - kasi: [], - ken: [], - kili: [], - kiwen: [], - ko: [], - kon: [], - kule: ["colorfully"], - kulupu: [], - kute: [], - lape: [], - laso: [], - lawa: [], - len: [], - lete: [], - lili: ["slightly"], - linja: [], - lipu: [], - loje: [], - lon: ["truthfully"], - luka: [], - lukin: [], - lupa: [], - ma: [], - mama: [], - mani: [], - meli: [], - mi: [], - mije: [], - moku: [], - moli: [], - monsi: [], - mu: [], - mun: [], - musi: ["entertainingly"], - mute: ["very"], - nanpa: ["numerically"], - nasa: ["strangely"], - nasin: [], - nena: [], - ni: [], - nimi: [], - noka: [], - olin: [], - ona: [], - open: [], - pakala: [], - pan: [], - pana: [], - pali: [], - palisa: [], - pilin: [], - pimeja: [], - pini: [], - pipi: [], - poka: [], - poki: [], - pona: ["nicely"], - sama: ["equally"], - seli: [], - selo: [], - seme: [], - sewi: ["divinely"], - sijelo: [], - sike: ["repeatedly"], - sin: ["newly"], - sina: [], - sinpin: [], - sitelen: [], - sona: [], - soweli: [], - suli: ["hugely", "importantly"], - suno: [], - supa: [], - suwi: ["sweetly"], - tan: [], - tawa: [], - telo: [], - tenpo: [], - toki: [], - tomo: [], - tonsi: [], - tu: [], - unpa: ["sexually"], - uta: [], - utala: ["conflictingly", "difficultly"], - walo: [], - wan: [], - waso: [], - wawa: ["powerfully"], - weka: [], - wile: [], -}; - -function translatePhraseToAdverb(phrase) { - let translations = ADVERB[phrase.headword].slice(); - if (phrase.emphasis === "headword") { - translations = translations.flatMap((word) => [`so ${word}`, `(${word})`]); - } - for (const modifier of phrase.modifiers) { - switch (modifier.type) { - case "proper word": - return []; - case "word": - if (modifier.emphasized) { - translations = translations.flatMap((word) => - ADVERB[modifier.word].flatMap((adverb) => [ - `(${adverb}) ${word}`, - `so ${adverb} ${word}`, - ]) - ); - } else { - translations = translations.flatMap((word) => - ADVERB[modifier.word].map((adverb) => `${adverb} ${word}`) - ); - } - break; - case "pi": - throw new Error("todo"); - } - } - if (phrase.emphasis === "whole") { - translations = translations.map((translation) => `(${translation})`); - } - return translations; -} -/** - * translates phrase into adjective without "in X way" - * - * this doesn't handle whole phrase emphasis - */ -function translatePhraseToSimpleAdjective(phrase) { - let translations = ADJECTIVE[phrase.headword].slice(); - if (phrase.emphasis === "headword") { - translations = translations.flatMap((word) => [`so ${word}`, `(${word})`]); - } - for (const modifier of phrase.modifiers) { - switch (modifier.type) { - case "proper word": - return []; - case "word": - if (modifier.emphasized) { - translations = translations.flatMap((word) => - ADVERB[modifier.word].flatMap((adverb) => [ - `(${adverb}) ${word}`, - `so ${adverb} ${word}`, - ]) - ); - } else { - translations = translations.flatMap((word) => - ADVERB[modifier.word].map((adverb) => `${adverb} ${word}`) - ); - } - break; - case "pi": - translations = translations.flatMap((word) => - translatePhraseToSimpleAdjective(modifier).map( - (adverb) => `${adverb} ${word}` - ) - ); - break; - } - } - return translations; -} -/** - * translates phrase into noun phrase without "of"s - * - * this doesn't handle whole phrase emphasis - */ -function translatePhraseToSimpleNoun(phrase) { - let translations = NOUN[phrase.headword].slice(); - if (phrase.emphasis === "headword") { - translations = translations.map((word) => `(${word})`); - } - for (const modifier of phrase.modifiers) { - switch (modifier.type) { - case "proper word": - if (modifier.emphasized) { - translations = translations.map( - (word) => `${word} (named ${modifier.name})` - ); - } else { - translations = translations.map( - (word) => `${word} named ${modifier.name}` - ); - } - break; - case "word": - if (modifier.emphasized) { - translations = translations.flatMap((word) => - ADJECTIVE[modifier.word].flatMap((adjective) => [ - `(${adjective}) ${word}`, - `so ${adjective} ${word}`, - ]) - ); - } else { - translations = translations.flatMap((word) => - ADJECTIVE[modifier.word].map((adjective) => `${adjective} ${word}`) - ); - } - break; - case "pi": - translations = translations.flatMap((word) => - translatePhraseToSimpleAdjective(modifier).map( - (adjective) => `${adjective} ${word}` - ) - ); - break; - } - } - return translations; -} -/** - * translates phrase into adjective phrase with "in X way" - */ -function translatePhraseToAdjective(phrase) { - let translations = translatePhraseToSimpleAdjective(phrase); - for (const [i, item] of phrase.modifiers.entries()) { - const heads = translatePhraseToSimpleAdjective({ - ...phrase, - modifiers: [ - ...phrase.modifiers.slice(0, i), - ...phrase.modifiers.slice(i + 1), - ], - }); - switch (item.type) { - case "proper word": - continue; - case "word": - if (item.emphasized) { - for (const head of heads) { - for (const adjective of ADJECTIVE[item.word]) { - translations.push(`${head} in (${adjective}) way`); - } - } - } else { - for (const head of heads) { - for (const adjective of ADJECTIVE[item.word]) { - translations.push(`${head} in ${adjective} way`); - } - } - } - break; - case "pi": - const phrases = translatePhraseToSimpleAdjective(item); - for (const head of heads) { - for (const phrase of phrases) { - translations.push(`${head} in ${phrase} way`); - } - } - break; - } - } - if (phrase.emphasis === "whole") { - translations = translations.map((translation) => `(${translation})`); - } - return translations; -} -/** - * translates phrase into noun phrase with "of"s - */ -function translatePhraseToNoun(phrase) { - let translations = translatePhraseToSimpleNoun(phrase); - for (const [i, item] of phrase.modifiers.entries()) { - const heads = translatePhraseToSimpleNoun({ - ...phrase, - modifiers: [ - ...phrase.modifiers.slice(0, i), - ...phrase.modifiers.slice(i + 1), - ], - }); - switch (item.type) { - case "proper word": - continue; - case "word": - if (item.emphasized) { - for (const head of heads) { - for (const noun of NOUN[item.word]) { - translations.push(`${head} of (${noun})`); - } - } - } else { - for (const head of heads) { - for (const noun of NOUN[item.word]) { - translations.push(`${head} of ${noun}`); - } - } - } - break; - case "pi": - const phrases = translatePhraseToSimpleNoun(item); - for (const head of heads) { - for (const phrase of phrases) { - translations.push(`${head} of ${phrase}`); - } - } - break; - } - } - if (phrase.emphasis === "whole") { - translations = translations.map((translation) => `(${translation})`); - } - return translations; -} -// /** -// * translates clauses before la -// */ -// function translateLaClause(clause) { -// switch (clause.type) { -// case "phrase": -// const translations = [ -// ...translatePhraseToAdjective(clause), -// ...translatePhraseToNoun(clause), -// ]; -// if (translations.length === 0) { -// throw new UntranslatableError("complicated phrase"); -// } -// return translations; -// default: -// throw new Error("todo"); -// } -// } -/** - * translates clauses after la or without la - */ -function translateFinalClause(clause) { - switch (clause.type) { - case "phrase": - const translations = [ - ...translatePhraseToAdjective(clause), - ...translatePhraseToNoun(clause), - ]; - if (translations.length === 0) { - throw new UntranslatableError("complicated phrase"); - } - return translations; - default: - throw new Error("todo"); - } -} -/** - * translates sentence without a or taso - */ -function translatePureSentence(pureSentence) { - let translations = [""]; - for (const beforeLa of pureSentence.beforeLa) { - translations = translations.flatMap((sentence) => { - switch (beforeLa.type) { - case "phrase": - return [ - ...translatePhraseToAdjective(beforeLa).map( - (translation) => `${sentence}if ${translation}, then ` - ), - ...translatePhraseToNoun(beforeLa).map( - (translation) => `${sentence}given ${translation}, ` - ), - ]; - default: - throw new Error("todo"); - } - }); - } - translations = translations.flatMap((sentence) => - translateFinalClause(pureSentence.sentence).map( - (translation) => `${sentence}${translation}` - ) - ); - return translations; -} -function translateSentence(sentence) { - let start; - switch (sentence.start.type) { - case "none": - start = ""; - break; - case "a": - if (sentence.start.count === 1) { - start = "ah"; - } else { - start = Array(sentence.start.count).fill("ha").join(""); - } - break; - case "taso": - if (sentence.start.emphasized) { - start = "(however),"; - } else { - start = "however,"; - } - break; - } - let punctuation = "."; - let end; - switch (sentence.end.type) { - case "none": - end = ""; - break; - case "a": - if (sentence.end.count === 1) { - punctuation = "!"; - end = ""; - } else { - end = Array(sentence.end.count).fill("ha").join(""); - } - break; - } - if (sentence.type === "a or taso only") { - return [`${start} ${end}`.trim() + punctuation]; - } else { - return translatePureSentence(sentence).map( - (sentence) => `${start} ${sentence} ${end}`.trim() + punctuation - ); - } -} -/** - * parses string of modifiers - */ -function parseModifier(array) { - if (array.length === 0) { - return [[]]; - } - let modifiers = [[]]; - let haveName = false; - // TODO: handle multiple separate proper word as error - for (const [i, item] of array.entries()) { - if (item === "pi") { - const phrase = array.slice(i + 1); - if (phrase.includes("pi")) { - throw new UnrecognizedError('multiple "pi"'); - } - if (phrase.length === 0) { - throw new UnrecognizedError('no content after "pi"'); - } - if (phrase.length === 1) { - throw new UnrecognizedError('single modifier after "pi"'); - } - const phrases = parsePhrase(array.slice(i + 1)); - modifiers = modifiers.flatMap((arr) => - phrases.map((phrase) => - arr.concat([ - { - type: "pi", - ...phrase, - }, - ]) - ) - ); - break; - } - if (item === "a") { - for (const arr of modifiers) { - arr[arr.length - 1].emphasized = true; - } - } else if (/^[A-Z]/.test(item)) { - if (haveName && i > 0 && !/^[A-Z]/.test(array[i - 1])) { - throw new UnrecognizedError("multiple proper name"); - } - haveName = true; - for (const arr of modifiers) { - if (arr.length > 0 && arr[arr.length - 1].type === "proper word") { - const properWord = arr.pop(); - arr.push({ - type: "proper word", - name: properWord.name + " " + item, - emphasized: false, - }); - } else { - arr.push({ - type: "proper word", - name: item, - emphasized: false, - }); - } - } - } else if (!MODIFIER.has(item)) { - if (VOCABULARY.has(item)) { - throw new UnrecognizedError(`"${item}" as modifier`); - } else { - throw new UnrecognizedError(`"${item}"`); - } - } else { - for (const arr of modifiers) { - arr.push({ - type: "word", - word: item, - emphasized: false, - }); - } - } - } - return modifiers; -} -/** - * parses phrase - */ -function parsePhrase(array) { - if (/^[A-Z]/.test(array[0])) { - throw new UnrecognizedError("Proper name as headword"); - } - if (!HEADWORD.has(array[0])) { - if (VOCABULARY.has(array[0])) { - throw new UnrecognizedError(`"${array[0]}" as headword`); - } else { - throw new UnrecognizedError(`"${array[0]}"`); - } - } - if (array[1] === "a") { - return parseModifier(array.slice(2)).map((modifier) => ({ - headword: array[0], - emphasis: "headword", - modifiers: modifier, - })); - } - if (array[array.length - 1] === "a") { - return [ - ...parseModifier(array.slice(1, -1)).map((modifier) => ({ - headword: array[0], - emphasis: "whole", - modifiers: modifier, - })), - ...parseModifier(array.slice(1)).map((modifier) => ({ - headword: array[0], - emphasis: "none", - modifiers: modifier, - })), - ]; - } - return parseModifier(array.slice(1)).map((modifier) => ({ - headword: array[0], - emphasis: "none", - modifiers: modifier, - })); -} -/** - * parses subject which may have "en" in it - */ -function parseSubject(array) { - throw new Error("todo"); -} -/** - * parses predicate after "li" or "o", also handles multiple "li" - */ -function parsePredicate(array) { - throw new Error("todo"); -} -/** - * parses simple sentence without la - */ -function parseClause(array) { - if ( - array.length > 1 && - (array[0] === "mi" || array[0] === "sina") && - !array.includes("li") - ) { - if (array[1] === "a") { - if (array.length === 2) { - throw new UnrecognizedError(`"${array[0]} a (pred)" construction`); - } else { - throw new Error("todo"); - } - } - throw new Error("todo"); - } else if (array.includes("li")) { - if ((array[0] === "mi" || array[0] === "sina") && array[1] === "li") { - throw new UnrecognizedError(`"${array[0]} li (pred)" construction`); - } - if (array.includes("o")) { - throw new UnrecognizedError('Clause with both "li" and "o"'); - } - throw new Error("todo"); - } else if (array.includes("o")) { - if (array.slice(array.indexOf("o") + 1).includes("o")) { - throw new UnrecognizedError('Multiple "o"s'); - } - throw new Error("todo"); - } else { - return parsePhrase(array).map((phrase) => ({ - type: "phrase", - ...phrase, - })); - } -} -/** - * parses sentence without "a" and "taso" particles in the start and end of an - * array - * - * if empty array is passed, this will return type of "a or taso only", - * intended for sentences sentences that only contains a or taso - */ -function parsePureSentence(array) { - if (array.length === 0) { - return [ - { - type: "a or taso only", - }, - ]; - } - const beforeLa = []; - let sentence = []; - for (const [i, item] of array.entries()) { - if (item === "la") { - if (sentence.length === 0) { - throw new UnrecognizedError('Having no content before "la"'); - } - if (array[i + 1] === "a") { - throw new UnrecognizedError('"la a"'); - } - beforeLa.push(sentence); - sentence = []; - } else { - sentence.push(item); - } - } - if (sentence.length === 0) { - throw new UnrecognizedError('Having no content after "la"'); - } - let beforeLaClauses = [[]]; - for (const clause of beforeLa) { - beforeLaClauses = beforeLaClauses.flatMap((prev) => - parseClause(clause).map((parsedClause) => prev.concat([parsedClause])) - ); - } - return parseClause(sentence).flatMap((sentence) => - beforeLaClauses.map((clauses) => ({ - type: "la", - beforeLa: clauses, - sentence, - })) - ); -} -/** - * parses sentence - */ -function parseFromWords(array) { - if (array.length === 0) { - return []; - } - let start = { - type: "none", - }; - let start_slice = 0; - if (array[0] === "a") { - let broke = false; - for (const [i, item] of [...array.entries()]) { - if (item !== "a") { - start = { - type: "a", - count: i, - }; - start_slice = i; - broke = true; - break; - } - } - if (!broke) { - return [ - { - start: { - type: "a", - count: array.length, - }, - end: { - type: "none", - }, - type: "a or taso only", - }, - ]; - } - } else if (array[0] === "taso") { - switch (array.length) { - case 1: - return [ - { - start: { - type: "taso", - emphasized: false, - }, - end: { - type: "none", - }, - type: "a or taso only", - }, - ]; - case 2: - if (array[1] === "a") { - return [ - { - start: { - type: "taso", - emphasized: true, - }, - end: { - type: "none", - }, - type: "a or taso only", - }, - { - start: { - type: "taso", - emphasized: false, - }, - end: { - type: "a", - count: 1, - }, - type: "a or taso only", - }, - ]; - } - break; - } - if (array[1] === "a") { - start = { - type: "taso", - emphasized: true, - }; - start_slice = 2; - } else { - start = { - type: "taso", - emphasized: false, - }; - start_slice = 1; - } - } - if (array[array.length - 1] === "a") { - if (array[array.length - 2] === "a") { - for (let i = 2; i < array.length; i++) { - if (array[array.length - 1 - i] !== "a") { - return parsePureSentence(array.slice(start_slice, -i)).map( - (sentence) => ({ - start, - end: { - type: "a", - count: i, - }, - ...sentence, - }) - ); - } - } - } else { - return [ - ...parsePureSentence(array.slice(start_slice)).map((sentence) => ({ - start, - end: { - type: "none", - }, - ...sentence, - })), - ...parsePureSentence(array.slice(start_slice, -1)).map((sentence) => ({ - start, - end: { - type: "a", - count: 1, - }, - ...sentence, - })), - ]; - } - } else { - return parsePureSentence(array.slice(start_slice)).map((sentence) => ({ - start, - end: { - type: "none", - }, - ...sentence, - })); - } -} -/** - * parses toki pona sentence into multiple possible AST represented as array - */ -function parse(tokiPona) { - const cleanSentence = tokiPona - .trim() - .replace(/[.!?]*$/, "") - .replaceAll(",", " "); - if (/[:.!?]/.test(cleanSentence)) { - throw new UnrecognizedError("Multiple sentences"); - } - let words = cleanSentence.split(/\s+/); - if (words[0] === "") { - words = []; - } - if (words.includes("anu")) { - throw new UnrecognizedError('"anu"'); - } - // TODO: handle multiple consecutive "a"s inside sentence as error - return parseFromWords(words); -} -function translate(tokiPona) { - return parse(tokiPona).flatMap(translateSentence); -} -document.addEventListener("DOMContentLoaded", () => { - const input = document.getElementById("input"); - const output = document.getElementById("output"); - const error = document.getElementById("error"); - input.addEventListener("input", () => { - while (output.children.length > 0) { - output.removeChild(output.children[0]); - } - error.innerText = ""; - let translations; - try { - translations = translate(input.value); - } catch (e) { - if (e instanceof UnrecognizedError) { - error.innerText = `${e.message} is unrecognized`; - return; - } else if (e instanceof UntranslatableError) { - error.innerText = `${e.message} can't be translated, but it should be. This is a bug. Consider providing feedback.`; - return; - } else { - throw e; - } - } - if (input.value !== "" && translations.length === 0) { - error.innerText = `This sentence can't be translated, but it should be. This is a bug. Consider providing feedback.`; - } - for (const translation of translations) { - const emphasized = translation - .replaceAll("(", "") - .replaceAll(")", ""); - const list = document.createElement("li"); - list.innerHTML = emphasized; - output.appendChild(list); - } - }); -}); From cbe0c2c2c56143eed7738a6d68729ff4e0b5c33b Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:37:15 +0800 Subject: [PATCH 261/271] ignore main.js --- .gitignore | 1 + 1 file changed, 1 insertion(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a9b203a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +main.js From 6e4ac638c4cc6f5b11aebc54f7d59b8fdd5e9e44 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:42:08 +0800 Subject: [PATCH 262/271] remove limitations in readme --- README.md | 33 --------------------------------- 1 file changed, 33 deletions(-) diff --git a/README.md b/README.md index 7581079..dcffb2a 100644 --- a/README.md +++ b/README.md @@ -17,36 +17,3 @@ deno task build Then open `./index.html` using your favorite browser. Whenever you made changes to `./src/*.ts`, you'll need to run `deno task build` again and refresh the browser. Later I'll make a script to automate this. - -## Limitations - -The following are currently unrecognized (non-definitive but pedantic). ✏️ means it is a limitation due to being work in progress and it will be lifted soon. Other limitation may also be lifted. - -- ✏️ Full sentences: It can only translate phrases for now. -- Non-pu vocabulary with exception to "pu" ("tonsi" is included in the vocabulary) -- Multiple sentences -- Comma as sentence separator (commas are treated as decoration and ignored) -- Proper word as headword -- Having multiple consecutive "a"s inside a sentence (in the beginning or end is fine) -- "taso" as headword ("taso" is currently recognized as modifier or particle at the beginning of a sentence) -- Having no clause before or after "la" particle -- "mi/sina li (pred)" constructions -- "mi/sina (pred) li (pred)" constructions (this would be recognized as "mi (modifier) li (pred)") -- "mi/sina a (pred)" constructions -- Clause with both "li" and "o" -- Clause with multiple "o"s -- Clause with "en" but without predicate ("li" or "o") -- "nanpa" as ordinal particle -- Extended numbering system -- "kepeken" as headword or modifier -- Multiple "pi" on a phrase -- "pi" followed by at most one modifier -- Multiple separate proper word on a single phrase, unless they're separated by "pi" (Proper words spanning multiple words like "musi Manka Sawa" is fine, this limitation refers to something like "musi Manka pona Sawa"; something like "musi Manka pi kule Sawa" is fine) -- proper word followed by "pi" -- "anu" particle -- "la a" -- "en a" -- "li a" -- "o a" -- "e a" -- "pi a" From 741df49ad8b67f48d470e781b281afd3a3873025 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:44:36 +0800 Subject: [PATCH 263/271] implement de-duplicator --- src/main.ts | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/main.ts b/src/main.ts index 8ba9a04..3fa2600 100644 --- a/src/main.ts +++ b/src/main.ts @@ -17,10 +17,14 @@ document.addEventListener("DOMContentLoaded", () => { if (translations.isError()) { error.innerText = translations.error?.message ?? "No error provided"; } else { + const set = new Set(); for (const translation of translations.output) { - const list = document.createElement("li"); - list.innerText = translation; - output.appendChild(list); + if (!set.has(translation)) { + const list = document.createElement("li"); + list.innerText = translation; + output.appendChild(list); + set.add(translation); + } } } }; From bd38ee9df4b7e0b17eead26b83f743ae6f0e62f5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:51:18 +0800 Subject: [PATCH 264/271] update changelog --- CHANGELOG.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 17f885c..7e98d6d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,9 +2,13 @@ ## 0.2.0 (On development) -For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. +For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. The translator can now translate many things! Although it's still not capable of translating full sentences. -- New limitation list. +- Implement translator for: + - Extended numbering system + - Reduplication + - _nanpa_ particle + - _en_ and _anu_ - Add button for translating, replacing auto-translate when typing. - (Downgrade) Drop support for "a" particle. - (Downgrade) Error messages are now very unreliable. From 152b31d772e38efb7de95ddb76cd7d7aa625b15d Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:51:57 +0800 Subject: [PATCH 265/271] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 7e98d6d..ad50a99 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## 0.2.0 (On development) -For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. The translator can now translate many things! Although it's still not capable of translating full sentences. +For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. The translator can now translate few more things! Although it's still not capable of translating full sentences. - Implement translator for: - Extended numbering system From b8ceca319e8dd398d22594bd6cbb4277425869c4 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:53:24 +0800 Subject: [PATCH 266/271] update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ad50a99..baa2d8e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,7 +2,7 @@ ## 0.2.0 (On development) -For this version. The whole code has been rewritten. This makes the code a lot easier to modify. Due to this, there are inevitable changes to the translator. The translator can now translate few more things! Although it's still not capable of translating full sentences. +For this version. The whole code has been rewritten. The translator can now translate few more things! Although it's still not capable of translating full sentences. - Implement translator for: - Extended numbering system From 308073ea1fc915a2663531f11dbbab5643b3e8a5 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 14:57:55 +0800 Subject: [PATCH 267/271] fix --- src/translator.ts | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/translator.ts b/src/translator.ts index 552403d..aeb6701 100644 --- a/src/translator.ts +++ b/src/translator.ts @@ -104,12 +104,17 @@ function modifierAsSuffix( construction.replace("X", translation) ); } else if (suffix.type === "nanpa") { - const named = kind === "noun"; - return phraseAs(kind, suffix.phrase, { named, suffix: false }).map( + return phraseAs(kind, suffix.phrase, { + named: kind === "noun", + suffix: false, + }).map( (translation) => `in position ${translation}`, ); } else if (suffix.type === "pi") { - return phraseAs(kind, suffix.phrase, { named: false, suffix: false }).map(( + return phraseAs(kind, suffix.phrase, { + named: kind === "noun", + suffix: false, + }).map(( translation, ) => construction.replace("X", translation)); } else if (suffix.type === "proper words") { From b61f7c6c4a835a964e6477c324fcceabd9fc9169 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 15:06:07 +0800 Subject: [PATCH 268/271] remove unneeded test script --- test-parser.ts | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 test-parser.ts diff --git a/test-parser.ts b/test-parser.ts deleted file mode 100644 index a243936..0000000 --- a/test-parser.ts +++ /dev/null @@ -1,10 +0,0 @@ -import { OutputError } from "./src/error.ts"; -import { parser } from "./src/parser.ts"; - -const input = await Deno.readTextFile("./test.txt"); -const output = parser(input); -console.log(JSON.stringify(output, (key, value) => { - if (key === "error") return (value as null | OutputError)?.message; - else return value; -}, 2)); -console.log(`The output has ${output.output.length} AST's`); From 5de07cf894a5397f2f6a3df51e1c9b54a9c86ee4 Mon Sep 17 00:00:00 2001 From: Koko Date: Wed, 24 Jan 2024 15:17:01 +0800 Subject: [PATCH 269/271] Create deno.yml --- .github/workflows/deno.yml | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 .github/workflows/deno.yml diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml new file mode 100644 index 0000000..168c933 --- /dev/null +++ b/.github/workflows/deno.yml @@ -0,0 +1,34 @@ +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +# This workflow will install Deno then run `deno lint` and `deno test`. +# For more information see: https://github.com/denoland/setup-deno + +name: Deno + +on: + push: + branches: ["release"] + pull_request: + branches: ["release"] + +permissions: + contents: read + +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Setup repo + uses: actions/checkout@v3 + + - name: Setup Deno + uses: denoland/setup-deno@v1 + with: + deno-version: v1.x + + - name: Build + run: deno task build From 573ff38713b8d8348a04b593c7b47c4ecf53f469 Mon Sep 17 00:00:00 2001 From: neverRare Date: Wed, 24 Jan 2024 15:18:34 +0800 Subject: [PATCH 270/271] release --- CHANGELOG.md | 2 +- index.html | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index baa2d8e..b9c5d86 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,6 @@ # Changelog -## 0.2.0 (On development) +## 0.2.0 For this version. The whole code has been rewritten. The translator can now translate few more things! Although it's still not capable of translating full sentences. diff --git a/index.html b/index.html index 8778900..b84283d 100644 --- a/index.html +++ b/index.html @@ -66,8 +66,8 @@

      Toki Pona Translator