diff --git a/.github/workflows/deno.yml b/.github/workflows/deno.yml
new file mode 100644
index 0000000..168c933
--- /dev/null
+++ b/.github/workflows/deno.yml
@@ -0,0 +1,34 @@
+# This workflow uses actions that are not certified by GitHub.
+# They are provided by a third-party and are governed by
+# separate terms of service, privacy policy, and support
+# documentation.
+
+# This workflow will install Deno then run `deno lint` and `deno test`.
+# For more information see: https://github.com/denoland/setup-deno
+
+name: Deno
+
+on:
+ push:
+ branches: ["release"]
+ pull_request:
+ branches: ["release"]
+
+permissions:
+ contents: read
+
+jobs:
+ test:
+ runs-on: ubuntu-latest
+
+ steps:
+ - name: Setup repo
+ uses: actions/checkout@v3
+
+ - name: Setup Deno
+ uses: denoland/setup-deno@v1
+ with:
+ deno-version: v1.x
+
+ - name: Build
+ run: deno task build
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..a9b203a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+main.js
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 4533aed..e7c316b 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -1,10 +1,11 @@
{
- "cSpell.customDictionaries": {
- "ku-suli": {
- "name": "ku-suli",
- "path": "${workspaceRoot}/nimi-ku-suli.txt",
- "description": "Ku Suli words of Toki Pona",
- "addWords": true
- }
+ "cSpell.customDictionaries": {
+ "ku-suli": {
+ "name": "ku-suli",
+ "path": "${workspaceRoot}/nimi-ku-suli.txt",
+ "description": "Ku Suli words of Toki Pona",
+ "addWords": true
}
-}
\ No newline at end of file
+ },
+ "deno.enable": true
+}
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 8778093..b9c5d86 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,35 @@
# Changelog
+## 0.2.0
+
+For this version. The whole code has been rewritten. The translator can now translate few more things! Although it's still not capable of translating full sentences.
+
+- Implement translator for:
+ - Extended numbering system
+ - Reduplication
+ - _nanpa_ particle
+ - _en_ and _anu_
+- Add button for translating, replacing auto-translate when typing.
+- (Downgrade) Drop support for "a" particle.
+- (Downgrade) Error messages are now very unreliable.
+- (Downgrade) Translator is somewhat slower.
+- Remove Discord DM as contact option.
+- Update translation list:
+ - _tonsi_ – change nouns "transgender", "transgenders", "non-binary", and "non-binaries" into "transgender person", "transgender people", "non-binary person", and "non-binary people" (I DIDN'T MEAN TO OBJECTIFY THEM OMFG I'M SO SORRY 😭😭😭)
+
+Inside update (intended for developers):
+
+- Rewritten whole code to use TypeScript, module, and functional programming.
+- Rewritten parser to use parser combinator.
+- Add language codes to html.
+- New wiki for contributors and thinkerers.
+- Overhaul `README.md`, only including build instruction. Information about the translator is now moved to wiki.
+
## 0.1.1
-- Update copyright notice
-- Update version number on the page
-- Update contacts to discord (from `neverRare#1517` to `never_rare`)
+- Update copyright notice.
+- Update version number on the page.
+- Update contacts to Discord. (from `neverRare#1517` to `never_rare`)
## 0.1.0
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..3a12db0
--- /dev/null
+++ b/CONTRIBUTING.md
@@ -0,0 +1,7 @@
+# Contributing
+
+You can do the usual github stuff: Open issue if there's an issue or you have a suggestion; Open pull request if you want to propose changes. If you want to propose a large change however, please open an issue first (or comment on an already existing issue page), and wait for my signal before beginning to work.
+
+## The wiki
+
+The wiki provides useful information for contributors, although it's not complete yet. Check it out: [Visit wiki](https://github.com/neverRare/toki-pona-translator/wiki).
diff --git a/LICENSE b/LICENSE
index 92f17bc..0a5db1a 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
MIT License
-Copyright (c) 2022 neverRare
+Copyright (c) 2024 neverRare
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
diff --git a/README.md b/README.md
index 62754b8..dcffb2a 100644
--- a/README.md
+++ b/README.md
@@ -4,65 +4,16 @@ An imperfect Toki Pona to English translator that translates into multiple sente
[Try it](https://neverrare.github.io/toki-pona-translator/)
-## Goals
+## Building
-The goals for this projects are:
+You'll need [Deno](https://deno.com/). Run the following command.
-- Provide translation that covers most of semantics and meaning of a Toki Pona sentence, but it doesn't have to be complete. This gives translations for users to scan into to give them a feel of how broad a Toki Pona sentence can mean.
-- As much as possible, provide translations that are grammatically sound: not just correct but also feels right. For example, "red one thing" sounds off than "one red thing". Due to the difference of English and Toki Pona and nuances of English, the translator may fall severely short for this goal, but we can try!
+```
+git clone https://github.com/neverRare/toki-pona-translator.git
+cd toki-pona-translator
+deno task build
+```
-## Non-goals
+Then open `./index.html` using your favorite browser.
-- Provide every possible translations.
-- Handle every edge cases of Toki Pona grammar. Some edge cases are listed in [limitations] along with others.
-- Handle compounds such as translating "tomo tawa" into "vehicle"
-- Translate Tokiponized proper word into what it was before such as translating "Manka" into "Minecraft"
-
-Some of these may be lifted in the future.
-
-## Terminology
-
-These are the terminology used in [limitations]
-
-- Headword – A single part of speech that in English, can be a noun, a verb, or an adjective; what the phrase starts with.
-- Modifier – A part of speech that modifies headword or another modifier.
-- Phrase – Headword and its modifiers.
-- Clause – A part of sentence without "la" particle, "taso" particle in the beginning, "a" particles in the beginning and the end; found before and after "la", or the sentence itself without particles around it if it doesn't have "la".
-- Proper Word – Proper name; Capitalized in Toki Pona.
-
-## Limitations
-
-[limitations]: #limitations
-
-The following are currently unrecognized (non-definitive but pedantic).
-
-- Full sentences: It can only translate phrases for now. The following limitations pretends the translator can translate full sentences, this is because these are planned limitations.
-- Non-pu vocabulary with exception to "pu" ("tonsi" is included in the vocabulary)
-- Multiple sentences
-- Comma as sentence separator (commas are treated as decoration and ignored)
-- Proper word as headword
-- Having multiple consecutive "a"s inside a sentence (in the beginning or end is fine)
-- "taso" as headword ("taso" is currently recognized as modifier or particle at the beginning of a sentence)
-- Having no clause before or after "la" particle
-- "mi/sina li (pred)" constructions
-- "mi/sina (pred) li (pred)" constructions (this would be recognized as "mi (modifier) li (pred)")
-- "mi/sina a (pred)" constructions
-- Clause with both "li" and "o"
-- Clause with multiple "o"s
-- Clause with "en" but without predicate ("li" or "o")
-- "nanpa" as ordinal particle
-- Extended numbering system
-- "kepeken" as headword or modifier
-- Multiple "pi" on a phrase
-- "pi" followed by at most one modifier
-- Multiple separate proper word on a single phrase, unless they're separated by "pi" (Proper words spanning multiple words like "musi Manka Sawa" is fine, this limitation refers to something like "musi Manka pona Sawa"; something like "musi Manka pi kule Sawa" is fine)
-- proper word followed by "pi"
-- "anu" particle
-- "la a"
-- "en a"
-- "li a"
-- "o a"
-- "e a"
-- "pi a"
-
-Some of these may be lifted in the future.
+Whenever you made changes to `./src/*.ts`, you'll need to run `deno task build` again and refresh the browser. Later I'll make a script to automate this.
diff --git a/bundle.ts b/bundle.ts
new file mode 100644
index 0000000..a2e0554
--- /dev/null
+++ b/bundle.ts
@@ -0,0 +1,10 @@
+import { bundle } from "https://deno.land/x/emit@0.34.0/mod.ts";
+
+const SOURCE = "./src/main.ts";
+const DESTINATION = "./main.js";
+
+const url = new URL(SOURCE, import.meta.url);
+const result = await bundle(url);
+
+const { code } = result;
+await Deno.writeTextFile(DESTINATION, code);
diff --git a/deno.json b/deno.json
new file mode 100644
index 0000000..52fee3f
--- /dev/null
+++ b/deno.json
@@ -0,0 +1,16 @@
+{
+ "lock": false,
+ "compilerOptions": {
+ "target": "esnext",
+ "lib": ["dom", "dom.iterable", "dom.asynciterable", "deno.ns"]
+ },
+ "tasks": {
+ "build": "deno run --allow-read --allow-write --allow-env ./bundle.ts"
+ },
+ "fmt": {
+ "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"]
+ },
+ "lint": {
+ "include": ["./src/**/*.ts", "./bundle.ts", "./test-parser.ts"]
+ }
+}
diff --git a/index.html b/index.html
index e71bb0a..b84283d 100644
--- a/index.html
+++ b/index.html
@@ -1,5 +1,5 @@
-
+
@@ -24,11 +24,15 @@ Toki Pona Translator
An imperfect Toki Pona to English translator that translates into multiple
sentences.
- Limitations .
-
+
+
+ Translate
+
@@ -51,7 +55,6 @@ Toki Pona Translator
>Toki Pona to multiple English sentence translator.
- Dm me on Discord: never_rare
Email me:
Toki Pona Translator
- © 2024 neverRare -
[`so ${word}`, `(${word})`]);
- }
- for (const modifier of phrase.modifiers) {
- switch (modifier.type) {
- case "proper word":
- return [];
- case "word":
- if (modifier.emphasized) {
- translations = translations.flatMap((word) =>
- ADVERB[modifier.word].flatMap((adverb) => [
- `(${adverb}) ${word}`,
- `so ${adverb} ${word}`,
- ])
- );
- } else {
- translations = translations.flatMap((word) =>
- ADVERB[modifier.word].map((adverb) => `${adverb} ${word}`)
- );
- }
- break;
- case "pi":
- throw new Error("todo");
- }
- }
- if (phrase.emphasis === "whole") {
- translations = translations.map((translation) => `(${translation})`);
- }
- return translations;
-}
-/**
- * translates phrase into adjective without "in X way"
- *
- * this doesn't handle whole phrase emphasis
- */
-function translatePhraseToSimpleAdjective(phrase) {
- let translations = ADJECTIVE[phrase.headword].slice();
- if (phrase.emphasis === "headword") {
- translations = translations.flatMap((word) => [`so ${word}`, `(${word})`]);
- }
- for (const modifier of phrase.modifiers) {
- switch (modifier.type) {
- case "proper word":
- return [];
- case "word":
- if (modifier.emphasized) {
- translations = translations.flatMap((word) =>
- ADVERB[modifier.word].flatMap((adverb) => [
- `(${adverb}) ${word}`,
- `so ${adverb} ${word}`,
- ])
- );
- } else {
- translations = translations.flatMap((word) =>
- ADVERB[modifier.word].map((adverb) => `${adverb} ${word}`)
- );
- }
- break;
- case "pi":
- translations = translations.flatMap((word) =>
- translatePhraseToSimpleAdjective(modifier).map(
- (adverb) => `${adverb} ${word}`
- )
- );
- break;
- }
- }
- return translations;
-}
-/**
- * translates phrase into noun phrase without "of"s
- *
- * this doesn't handle whole phrase emphasis
- */
-function translatePhraseToSimpleNoun(phrase) {
- let translations = NOUN[phrase.headword].slice();
- if (phrase.emphasis === "headword") {
- translations = translations.map((word) => `(${word})`);
- }
- for (const modifier of phrase.modifiers) {
- switch (modifier.type) {
- case "proper word":
- if (modifier.emphasized) {
- translations = translations.map(
- (word) => `${word} (named ${modifier.name})`
- );
- } else {
- translations = translations.map(
- (word) => `${word} named ${modifier.name}`
- );
- }
- break;
- case "word":
- if (modifier.emphasized) {
- translations = translations.flatMap((word) =>
- ADJECTIVE[modifier.word].flatMap((adjective) => [
- `(${adjective}) ${word}`,
- `so ${adjective} ${word}`,
- ])
- );
- } else {
- translations = translations.flatMap((word) =>
- ADJECTIVE[modifier.word].map((adjective) => `${adjective} ${word}`)
- );
- }
- break;
- case "pi":
- translations = translations.flatMap((word) =>
- translatePhraseToSimpleAdjective(modifier).map(
- (adjective) => `${adjective} ${word}`
- )
- );
- break;
- }
- }
- return translations;
-}
-/**
- * translates phrase into adjective phrase with "in X way"
- */
-function translatePhraseToAdjective(phrase) {
- let translations = translatePhraseToSimpleAdjective(phrase);
- for (const [i, item] of phrase.modifiers.entries()) {
- const heads = translatePhraseToSimpleAdjective({
- ...phrase,
- modifiers: [
- ...phrase.modifiers.slice(0, i),
- ...phrase.modifiers.slice(i + 1),
- ],
- });
- switch (item.type) {
- case "proper word":
- continue;
- case "word":
- if (item.emphasized) {
- for (const head of heads) {
- for (const adjective of ADJECTIVE[item.word]) {
- translations.push(`${head} in (${adjective}) way`);
- }
- }
- } else {
- for (const head of heads) {
- for (const adjective of ADJECTIVE[item.word]) {
- translations.push(`${head} in ${adjective} way`);
- }
- }
- }
- break;
- case "pi":
- const phrases = translatePhraseToSimpleAdjective(item);
- for (const head of heads) {
- for (const phrase of phrases) {
- translations.push(`${head} in ${phrase} way`);
- }
- }
- break;
- }
- }
- if (phrase.emphasis === "whole") {
- translations = translations.map((translation) => `(${translation})`);
- }
- return translations;
-}
-/**
- * translates phrase into noun phrase with "of"s
- */
-function translatePhraseToNoun(phrase) {
- let translations = translatePhraseToSimpleNoun(phrase);
- for (const [i, item] of phrase.modifiers.entries()) {
- const heads = translatePhraseToSimpleNoun({
- ...phrase,
- modifiers: [
- ...phrase.modifiers.slice(0, i),
- ...phrase.modifiers.slice(i + 1),
- ],
- });
- switch (item.type) {
- case "proper word":
- continue;
- case "word":
- if (item.emphasized) {
- for (const head of heads) {
- for (const noun of NOUN[item.word]) {
- translations.push(`${head} of (${noun})`);
- }
- }
- } else {
- for (const head of heads) {
- for (const noun of NOUN[item.word]) {
- translations.push(`${head} of ${noun}`);
- }
- }
- }
- break;
- case "pi":
- const phrases = translatePhraseToSimpleNoun(item);
- for (const head of heads) {
- for (const phrase of phrases) {
- translations.push(`${head} of ${phrase}`);
- }
- }
- break;
- }
- }
- if (phrase.emphasis === "whole") {
- translations = translations.map((translation) => `(${translation})`);
- }
- return translations;
-}
-// /**
-// * translates clauses before la
-// */
-// function translateLaClause(clause) {
-// switch (clause.type) {
-// case "phrase":
-// const translations = [
-// ...translatePhraseToAdjective(clause),
-// ...translatePhraseToNoun(clause),
-// ];
-// if (translations.length === 0) {
-// throw new UntranslatableError("complicated phrase");
-// }
-// return translations;
-// default:
-// throw new Error("todo");
-// }
-// }
-/**
- * translates clauses after la or without la
- */
-function translateFinalClause(clause) {
- switch (clause.type) {
- case "phrase":
- const translations = [
- ...translatePhraseToAdjective(clause),
- ...translatePhraseToNoun(clause),
- ];
- if (translations.length === 0) {
- throw new UntranslatableError("complicated phrase");
- }
- return translations;
- default:
- throw new Error("todo");
- }
-}
-/**
- * translates sentence without a or taso
- */
-function translatePureSentence(pureSentence) {
- let translations = [""];
- for (const beforeLa of pureSentence.beforeLa) {
- translations = translations.flatMap((sentence) => {
- switch (beforeLa.type) {
- case "phrase":
- return [
- ...translatePhraseToAdjective(beforeLa).map(
- (translation) => `${sentence}if ${translation}, then `
- ),
- ...translatePhraseToNoun(beforeLa).map(
- (translation) => `${sentence}given ${translation}, `
- ),
- ];
- default:
- throw new Error("todo");
- }
- });
- }
- translations = translations.flatMap((sentence) =>
- translateFinalClause(pureSentence.sentence).map(
- (translation) => `${sentence}${translation}`
- )
- );
- return translations;
-}
-function translateSentence(sentence) {
- let start;
- switch (sentence.start.type) {
- case "none":
- start = "";
- break;
- case "a":
- if (sentence.start.count === 1) {
- start = "ah";
- } else {
- start = Array(sentence.start.count).fill("ha").join("");
- }
- break;
- case "taso":
- if (sentence.start.emphasized) {
- start = "(however),";
- } else {
- start = "however,";
- }
- break;
- }
- let punctuation = ".";
- let end;
- switch (sentence.end.type) {
- case "none":
- end = "";
- break;
- case "a":
- if (sentence.end.count === 1) {
- punctuation = "!";
- end = "";
- } else {
- end = Array(sentence.end.count).fill("ha").join("");
- }
- break;
- }
- if (sentence.type === "a or taso only") {
- return [`${start} ${end}`.trim() + punctuation];
- } else {
- return translatePureSentence(sentence).map(
- (sentence) => `${start} ${sentence} ${end}`.trim() + punctuation
- );
- }
-}
-/**
- * parses string of modifiers
- */
-function parseModifier(array) {
- if (array.length === 0) {
- return [[]];
- }
- let modifiers = [[]];
- let haveName = false;
- // TODO: handle multiple separate proper word as error
- for (const [i, item] of array.entries()) {
- if (item === "pi") {
- const phrase = array.slice(i + 1);
- if (phrase.includes("pi")) {
- throw new UnrecognizedError('multiple "pi"');
- }
- if (phrase.length === 0) {
- throw new UnrecognizedError('no content after "pi"');
- }
- if (phrase.length === 1) {
- throw new UnrecognizedError('single modifier after "pi"');
- }
- const phrases = parsePhrase(array.slice(i + 1));
- modifiers = modifiers.flatMap((arr) =>
- phrases.map((phrase) =>
- arr.concat([
- {
- type: "pi",
- ...phrase,
- },
- ])
- )
- );
- break;
- }
- if (item === "a") {
- for (const arr of modifiers) {
- arr[arr.length - 1].emphasized = true;
- }
- } else if (/^[A-Z]/.test(item)) {
- if (haveName && i > 0 && !/^[A-Z]/.test(array[i - 1])) {
- throw new UnrecognizedError("multiple proper name");
- }
- haveName = true;
- for (const arr of modifiers) {
- if (arr.length > 0 && arr[arr.length - 1].type === "proper word") {
- const properWord = arr.pop();
- arr.push({
- type: "proper word",
- name: properWord.name + " " + item,
- emphasized: false,
- });
- } else {
- arr.push({
- type: "proper word",
- name: item,
- emphasized: false,
- });
- }
- }
- } else if (!MODIFIER.has(item)) {
- if (VOCABULARY.has(item)) {
- throw new UnrecognizedError(`"${item}" as modifier`);
- } else {
- throw new UnrecognizedError(`"${item}"`);
- }
- } else {
- for (const arr of modifiers) {
- arr.push({
- type: "word",
- word: item,
- emphasized: false,
- });
- }
- }
- }
- return modifiers;
-}
-/**
- * parses phrase
- */
-function parsePhrase(array) {
- if (/^[A-Z]/.test(array[0])) {
- throw new UnrecognizedError("Proper name as headword");
- }
- if (!HEADWORD.has(array[0])) {
- if (VOCABULARY.has(array[0])) {
- throw new UnrecognizedError(`"${array[0]}" as headword`);
- } else {
- throw new UnrecognizedError(`"${array[0]}"`);
- }
- }
- if (array[1] === "a") {
- return parseModifier(array.slice(2)).map((modifier) => ({
- headword: array[0],
- emphasis: "headword",
- modifiers: modifier,
- }));
- }
- if (array[array.length - 1] === "a") {
- return [
- ...parseModifier(array.slice(1, -1)).map((modifier) => ({
- headword: array[0],
- emphasis: "whole",
- modifiers: modifier,
- })),
- ...parseModifier(array.slice(1)).map((modifier) => ({
- headword: array[0],
- emphasis: "none",
- modifiers: modifier,
- })),
- ];
- }
- return parseModifier(array.slice(1)).map((modifier) => ({
- headword: array[0],
- emphasis: "none",
- modifiers: modifier,
- }));
-}
-/**
- * parses subject which may have "en" in it
- */
-function parseSubject(array) {
- throw new Error("todo");
-}
-/**
- * parses predicate after "li" or "o", also handles multiple "li"
- */
-function parsePredicate(array) {
- throw new Error("todo");
-}
-/**
- * parses simple sentence without la
- */
-function parseClause(array) {
- if (
- array.length > 1 &&
- (array[0] === "mi" || array[0] === "sina") &&
- !array.includes("li")
- ) {
- if (array[1] === "a") {
- if (array.length === 2) {
- throw new UnrecognizedError(`"${array[0]} a (pred)" construction`);
- } else {
- throw new Error("todo");
- }
- }
- throw new Error("todo");
- } else if (array.includes("li")) {
- if ((array[0] === "mi" || array[0] === "sina") && array[1] === "li") {
- throw new UnrecognizedError(`"${array[0]} li (pred)" construction`);
- }
- if (array.includes("o")) {
- throw new UnrecognizedError('Clause with both "li" and "o"');
- }
- throw new Error("todo");
- } else if (array.includes("o")) {
- if (array.slice(array.indexOf("o") + 1).includes("o")) {
- throw new UnrecognizedError('Multiple "o"s');
- }
- throw new Error("todo");
- } else {
- return parsePhrase(array).map((phrase) => ({
- type: "phrase",
- ...phrase,
- }));
- }
-}
-/**
- * parses sentence without "a" and "taso" particles in the start and end of an
- * array
- *
- * if empty array is passed, this will return type of "a or taso only",
- * intended for sentences sentences that only contains a or taso
- */
-function parsePureSentence(array) {
- if (array.length === 0) {
- return [
- {
- type: "a or taso only",
- },
- ];
- }
- const beforeLa = [];
- let sentence = [];
- for (const [i, item] of array.entries()) {
- if (item === "la") {
- if (sentence.length === 0) {
- throw new UnrecognizedError('Having no content before "la"');
- }
- if (array[i + 1] === "a") {
- throw new UnrecognizedError('"la a"');
- }
- beforeLa.push(sentence);
- sentence = [];
- } else {
- sentence.push(item);
- }
- }
- if (sentence.length === 0) {
- throw new UnrecognizedError('Having no content after "la"');
- }
- let beforeLaClauses = [[]];
- for (const clause of beforeLa) {
- beforeLaClauses = beforeLaClauses.flatMap((prev) =>
- parseClause(clause).map((parsedClause) => prev.concat([parsedClause]))
- );
- }
- return parseClause(sentence).flatMap((sentence) =>
- beforeLaClauses.map((clauses) => ({
- type: "la",
- beforeLa: clauses,
- sentence,
- }))
- );
-}
-/**
- * parses sentence
- */
-function parseFromWords(array) {
- if (array.length === 0) {
- return [];
- }
- let start = {
- type: "none",
- };
- let start_slice = 0;
- if (array[0] === "a") {
- let broke = false;
- for (const [i, item] of [...array.entries()]) {
- if (item !== "a") {
- start = {
- type: "a",
- count: i,
- };
- start_slice = i;
- broke = true;
- break;
- }
- }
- if (!broke) {
- return [
- {
- start: {
- type: "a",
- count: array.length,
- },
- end: {
- type: "none",
- },
- type: "a or taso only",
- },
- ];
- }
- } else if (array[0] === "taso") {
- switch (array.length) {
- case 1:
- return [
- {
- start: {
- type: "taso",
- emphasized: false,
- },
- end: {
- type: "none",
- },
- type: "a or taso only",
- },
- ];
- case 2:
- if (array[1] === "a") {
- return [
- {
- start: {
- type: "taso",
- emphasized: true,
- },
- end: {
- type: "none",
- },
- type: "a or taso only",
- },
- {
- start: {
- type: "taso",
- emphasized: false,
- },
- end: {
- type: "a",
- count: 1,
- },
- type: "a or taso only",
- },
- ];
- }
- break;
- }
- if (array[1] === "a") {
- start = {
- type: "taso",
- emphasized: true,
- };
- start_slice = 2;
- } else {
- start = {
- type: "taso",
- emphasized: false,
- };
- start_slice = 1;
- }
- }
- if (array[array.length - 1] === "a") {
- if (array[array.length - 2] === "a") {
- for (let i = 2; i < array.length; i++) {
- if (array[array.length - 1 - i] !== "a") {
- return parsePureSentence(array.slice(start_slice, -i)).map(
- (sentence) => ({
- start,
- end: {
- type: "a",
- count: i,
- },
- ...sentence,
- })
- );
- }
- }
- } else {
- return [
- ...parsePureSentence(array.slice(start_slice)).map((sentence) => ({
- start,
- end: {
- type: "none",
- },
- ...sentence,
- })),
- ...parsePureSentence(array.slice(start_slice, -1)).map((sentence) => ({
- start,
- end: {
- type: "a",
- count: 1,
- },
- ...sentence,
- })),
- ];
- }
- } else {
- return parsePureSentence(array.slice(start_slice)).map((sentence) => ({
- start,
- end: {
- type: "none",
- },
- ...sentence,
- }));
- }
-}
-/**
- * parses toki pona sentence into multiple possible AST represented as array
- */
-function parse(tokiPona) {
- const cleanSentence = tokiPona
- .trim()
- .replace(/[.!?]*$/, "")
- .replaceAll(",", " ");
- if (/[:.!?]/.test(cleanSentence)) {
- throw new UnrecognizedError("Multiple sentences");
- }
- let words = cleanSentence.split(/\s+/);
- if (words[0] === "") {
- words = [];
- }
- if (words.includes("anu")) {
- throw new UnrecognizedError('"anu"');
- }
- // TODO: handle multiple consecutive "a"s inside sentence as error
- return parseFromWords(words);
-}
-function translate(tokiPona) {
- return parse(tokiPona).flatMap(translateSentence);
-}
-document.addEventListener("DOMContentLoaded", () => {
- const input = document.getElementById("input");
- const output = document.getElementById("output");
- const error = document.getElementById("error");
- input.addEventListener("input", () => {
- while (output.children.length > 0) {
- output.removeChild(output.children[0]);
- }
- error.innerText = "";
- let translations;
- try {
- translations = translate(input.value);
- } catch (e) {
- if (e instanceof UnrecognizedError) {
- error.innerText = `${e.message} is unrecognized`;
- return;
- } else if (e instanceof UntranslatableError) {
- error.innerText = `${e.message} can't be translated, but it should be. This is a bug. Consider providing feedback.`;
- return;
- } else {
- throw e;
- }
- }
- if (input.value !== "" && translations.length === 0) {
- error.innerText = `This sentence can't be translated, but it should be. This is a bug. Consider providing feedback.`;
- }
- for (const translation of translations) {
- const emphasized = translation
- .replaceAll("(", "")
- .replaceAll(")", " ");
- const list = document.createElement("li");
- list.innerHTML = emphasized;
- output.appendChild(list);
- }
- });
-});
diff --git a/src/ast.ts b/src/ast.ts
new file mode 100644
index 0000000..3c411f6
--- /dev/null
+++ b/src/ast.ts
@@ -0,0 +1,175 @@
+/** Represents a word unit. */
+export type WordUnit =
+ | { type: "default"; word: string }
+ | { type: "x ala x"; word: string }
+ | { type: "reduplication"; word: string; count: number }
+ | { type: "numbers"; numbers: Array };
+/** Represents a single modifier. */
+export type Modifier =
+ | { type: "default"; word: WordUnit }
+ | { type: "proper words"; words: string }
+ | { type: "pi"; phrase: Phrase }
+ | { type: "nanpa"; nanpa: WordUnit; phrase: Phrase }
+ | { type: "quotation"; quotation: Quotation };
+/**
+ * Represents a phrase including preverbial phrases, quotations, and
+ * prepositional phrases intended for predicate.
+ */
+export type Phrase =
+ | {
+ type: "default";
+ headWord: WordUnit;
+ modifiers: Array;
+ }
+ | {
+ type: "preverb";
+ preverb: WordUnit;
+ modifiers: Array;
+ phrase: Phrase;
+ }
+ | {
+ type: "preposition";
+ preposition: Preposition;
+ }
+ | {
+ type: "quotation";
+ quotation: Quotation;
+ };
+/** Represents multiple phrases separated by repeated particle or _anu_. */
+export type MultiplePhrases =
+ | { type: "single"; phrase: Phrase }
+ | {
+ type: "and conjunction";
+ phrases: Array;
+ }
+ | { type: "anu"; phrases: Array };
+/** Represents a single prepositional phrase. */
+export type Preposition = {
+ preposition: WordUnit;
+ modifiers: Array;
+ /** This cannot be an "and conjunction": only "anu" or "single". */
+ phrases: MultiplePhrases;
+};
+/** Represents multiple predicates. */
+export type MultiplePredicates =
+ | { type: "single"; predicate: Phrase }
+ | {
+ type: "associated";
+ predicates: MultiplePhrases;
+ objects: null | MultiplePhrases;
+ prepositions: Array;
+ }
+ | { type: "and conjunction"; predicates: Array }
+ | { type: "anu"; predicates: Array };
+/** Represents a simple clause. */
+export type Clause =
+ | { type: "phrases"; phrases: MultiplePhrases }
+ | {
+ type: "o vocative";
+ phrases: MultiplePhrases;
+ }
+ | {
+ type: "li clause";
+ subjects: MultiplePhrases;
+ predicates: MultiplePredicates;
+ }
+ | {
+ type: "o clause";
+ subjects: null | MultiplePhrases;
+ predicates: MultiplePredicates;
+ }
+ | {
+ type: "prepositions";
+ prepositions: Array;
+ }
+ | {
+ type: "quotation";
+ quotation: Quotation;
+ };
+/** Represents a clause including preclause and postclause. */
+export type FullClause = {
+ taso: null | WordUnit;
+ anuSeme: null | WordUnit;
+ clause: Clause;
+};
+/** Represents a single full sentence. */
+export type Sentence = { laClauses: Array; punctuation: string };
+/** Represents quotation. */
+export type Quotation = {
+ sentences: Array;
+ leftMark: string;
+ rightMark: string;
+};
+export function someModifierInPhrase(
+ phrase: Phrase,
+ whenQuotation: boolean,
+ checker: (modifier: Modifier) => boolean,
+): boolean {
+ if (phrase.type === "default") {
+ return phrase.modifiers.some(checker);
+ } else if (phrase.type === "preverb") {
+ return phrase.modifiers.some(checker) ||
+ someModifierInPhrase(phrase.phrase, whenQuotation, checker);
+ } else if (phrase.type === "preposition") {
+ const preposition = phrase.preposition;
+ return preposition.modifiers.some(checker) ||
+ someModifierInMultiplePhrases(
+ preposition.phrases,
+ whenQuotation,
+ checker,
+ );
+ } else if (phrase.type === "quotation") {
+ return whenQuotation;
+ } else {
+ throw new Error("unreachable");
+ }
+}
+export function someModifierInMultiplePhrases(
+ phrases: MultiplePhrases,
+ whenQuotation: boolean,
+ checker: (modifier: Modifier) => boolean,
+): boolean {
+ if (phrases.type === "single") {
+ return someModifierInPhrase(phrases.phrase, whenQuotation, checker);
+ } else if (phrases.type === "and conjunction" || phrases.type === "anu") {
+ return phrases.phrases.some((phrases) =>
+ someModifierInMultiplePhrases(phrases, whenQuotation, checker)
+ );
+ } else {
+ throw new Error("unreachable");
+ }
+}
+export function somePhraseInMultiplePhrases(
+ phrases: MultiplePhrases,
+ checker: (modifier: Phrase) => boolean,
+): boolean {
+ if (phrases.type === "single") {
+ return checker(phrases.phrase);
+ } else if (phrases.type === "and conjunction" || phrases.type === "anu") {
+ return phrases.phrases.some((phrases) =>
+ somePhraseInMultiplePhrases(phrases, checker)
+ );
+ } else {
+ throw new Error("unreachable");
+ }
+}
+export function someObjectInMultiplePredicate(
+ predicate: MultiplePredicates,
+ checker: (object: Phrase) => boolean,
+): boolean {
+ if (predicate.type === "single") {
+ return false;
+ } else if (predicate.type === "associated") {
+ if (predicate.objects) {
+ return somePhraseInMultiplePhrases(predicate.objects, checker);
+ } else {
+ return false;
+ }
+ } else if (predicate.type === "and conjunction" || predicate.type === "anu") {
+ return predicate.predicates.some((predicates) =>
+ someObjectInMultiplePredicate(predicates, checker)
+ );
+ } else {
+ throw new Error("unreachable");
+ }
+}
diff --git a/src/definition.ts b/src/definition.ts
new file mode 100644
index 0000000..f94eab8
--- /dev/null
+++ b/src/definition.ts
@@ -0,0 +1,416 @@
+/** Represents possible translations of words. */
+export type Translation = {
+ noun: Array;
+ adjective: Array;
+ adverb: Array;
+};
+/** Record of word translations. */
+export const DEFINITION: { [key: string]: Translation } = {
+ akesi: {
+ noun: ["reptile", "reptiles", "amphibian", "amphibians"],
+ adjective: ["reptilian", "amphibian"],
+ adverb: [],
+ },
+ ala: {
+ noun: ["nothing", "no"],
+ adjective: ["not", "no"],
+ adverb: ["not"],
+ },
+ alasa: { noun: ["searching"], adjective: [], adverb: [] },
+ ale: {
+ noun: ["everything"],
+ adjective: ["all"],
+ adverb: ["completely"],
+ },
+ ali: {
+ noun: ["everything"],
+ adjective: ["all"],
+ adverb: ["completely"],
+ },
+ anpa: {
+ noun: ["bottom", "bottoms", "under"],
+ adjective: ["bottom"],
+ adverb: [],
+ },
+ ante: {
+ noun: ["changing"],
+ adjective: ["different", "other"],
+ adverb: ["differently"],
+ },
+ awen: { noun: ["staying"], adjective: ["staying"], adverb: [] },
+ esun: { noun: ["shop", "shops"], adjective: [], adverb: [] },
+ ijo: { noun: ["thing", "things"], adjective: [], adverb: [] },
+ ike: { noun: ["badness"], adjective: ["bad"], adverb: ["badly"] },
+ ilo: { noun: ["tool", "tools"], adjective: [], adverb: [] },
+ insa: { noun: ["inside", "insides"], adjective: [], adverb: [] },
+ jaki: {
+ noun: ["obscenity", "obscenities"],
+ adjective: ["gross"],
+ adverb: ["disgustingly"],
+ },
+ jan: {
+ noun: ["person", "people", "human", "humans", "humanity"],
+ adjective: ["person-like"],
+ adverb: [],
+ },
+ jelo: { noun: ["yellowness"], adjective: ["yellow"], adverb: [] },
+ jo: {
+ noun: ["possession", "possessions"],
+ adjective: [],
+ adverb: [],
+ },
+ kala: {
+ noun: ["fish", "fishes"],
+ adjective: ["fish-like"],
+ adverb: [],
+ },
+ kalama: {
+ noun: ["sound", "sounds"],
+ adjective: ["sounding"],
+ adverb: [],
+ },
+ kama: { noun: ["arriving"], adjective: ["arriving"], adverb: [] },
+ kasi: {
+ noun: ["plant", "plants"],
+ adjective: ["plant-like"],
+ adverb: [],
+ },
+ ken: {
+ noun: ["ability", "abilities", "possibility", "possibilities"],
+ adjective: [],
+ adverb: [],
+ },
+ kili: {
+ noun: ["fruit", "fruits", "vegetable", "vegetables"],
+ adjective: [],
+ adverb: [],
+ },
+ kiwen: {
+ noun: ["hard thing", "hard things"],
+ adjective: ["hard"],
+ adverb: [],
+ },
+ ko: {
+ noun: ["soft thing", "soft things", "powder"],
+ adjective: ["soft"],
+ adverb: [],
+ },
+ kon: { noun: ["air", "essence"], adjective: [], adverb: [] },
+ kule: {
+ noun: ["color", "colors"],
+ adjective: ["colorful"],
+ adverb: ["colorfully"],
+ },
+ kulupu: { noun: ["group", "groups"], adjective: [], adverb: [] },
+ kute: {
+ noun: ["ear", "ears", "listening"],
+ adjective: [],
+ adverb: [],
+ },
+ lape: {
+ noun: ["sleep", "rest"],
+ adjective: ["sleeping"],
+ adverb: [],
+ },
+ laso: {
+ noun: ["blueness", "greenness"],
+ adjective: ["blue", "green"],
+ adverb: [],
+ },
+ lawa: {
+ noun: ["head", "heads", "control", "controls"],
+ adjective: ["controlling"],
+ adverb: [],
+ },
+ len: {
+ noun: ["cloth", "clothes", "hiding"],
+ adjective: ["hidden"],
+ adverb: [],
+ },
+ lete: {
+ noun: ["coldness"],
+ adjective: ["cold", "uncooked"],
+ adverb: [],
+ },
+ lili: {
+ noun: ["smallness"],
+ adjective: ["small"],
+ adverb: ["slightly"],
+ },
+ linja: {
+ noun: ["long flexible thing", "long flexible things"],
+ adjective: ["long flexible"],
+ adverb: [],
+ },
+ lipu: {
+ noun: ["book", "books", "paper", "paper-like thing", "paper-like things"],
+ adjective: ["paper-like"],
+ adverb: [],
+ },
+ loje: { noun: ["redness"], adjective: ["red"], adverb: [] },
+ lon: {
+ noun: ["truth", "true"],
+ adjective: ["truthful"],
+ adverb: ["truthfully"],
+ },
+ luka: {
+ noun: ["hand", "hands", "arm", "arms"],
+ adjective: [],
+ adverb: [],
+ },
+ lukin: { noun: ["eye", "eyes", "sight"], adjective: [], adverb: [] },
+ lupa: { noun: ["hole", "holes"], adjective: [], adverb: [] },
+ ma: {
+ noun: ["place", "places", "earth"],
+ adjective: ["earthy"],
+ adverb: [],
+ },
+ mama: {
+ noun: ["parent", "parents", "creator", "creators"],
+ adjective: [],
+ adverb: [],
+ },
+ mani: {
+ noun: ["money", "large domestic animal", "large domestic animals"],
+ adjective: [],
+ adverb: [],
+ },
+ meli: {
+ noun: ["woman", "women", "feminity"],
+ adjective: ["woman", "feminine"],
+ adverb: [],
+ },
+ mi: {
+ noun: ["I", "me", "we", "us"],
+ adjective: ["my", "our"],
+ adverb: [],
+ },
+ mije: {
+ noun: ["man", "men", "masculinity"],
+ adjective: ["man", "masculine"],
+ adverb: [],
+ },
+ moku: {
+ noun: ["food", "foods", "drink", "drinks"],
+ adjective: [],
+ adverb: [],
+ },
+ moli: { noun: ["death"], adjective: ["dead", "deadly"], adverb: [] },
+ monsi: { noun: ["back"], adjective: [], adverb: [] },
+ mu: { noun: ["moo"], adjective: ["mooing"], adverb: [] },
+ mun: {
+ noun: ["celestial object", "celestial objects", "glowing thing"],
+ adjective: ["glowing"],
+ adverb: [],
+ },
+ musi: {
+ noun: ["entertainment", "entertainments"],
+ adjective: ["entertaining"],
+ adverb: ["entertainingly"],
+ },
+ mute: { noun: ["many"], adjective: ["many"], adverb: ["very"] },
+ nanpa: {
+ noun: ["number", "numbers"],
+ adjective: ["numeric"],
+ adverb: ["numerically"],
+ },
+ nasa: {
+ noun: ["silliness", "strangeness"],
+ adjective: ["silly", "strange"],
+ adverb: ["strangely"],
+ },
+ nasin: { noun: ["way"], adjective: [], adverb: [] },
+ nena: { noun: ["bump"], adjective: [], adverb: [] },
+ ni: {
+ noun: ["this", "that"],
+ adjective: ["this", "that"],
+ adverb: [],
+ },
+ nimi: {
+ noun: ["name", "names", "word", "words"],
+ adjective: [],
+ adverb: [],
+ },
+ noka: {
+ noun: ["foot", "feet", "leg", "legs"],
+ adjective: [],
+ adverb: [],
+ },
+ olin: { noun: ["love"], adjective: [], adverb: [] },
+ ona: {
+ noun: ["they", "them", "it"],
+ adjective: ["their", "its"],
+ adverb: [],
+ },
+ open: {
+ noun: ["beginning", "beginnings"],
+ adjective: [],
+ adverb: [],
+ },
+ pakala: {
+ noun: ["mistake", "mistakes"],
+ adjective: ["broken"],
+ adverb: [],
+ },
+ pan: { noun: ["grain", "grains"], adjective: [], adverb: [] },
+ pana: { noun: ["giving"], adjective: [], adverb: [] },
+ pali: { noun: ["work"], adjective: ["working"], adverb: [] },
+ palisa: {
+ noun: ["long hard thing", "long hard things"],
+ adjective: ["long hard"],
+ adverb: [],
+ },
+ pilin: { noun: ["emotion", "emotions"], adjective: [], adverb: [] },
+ pimeja: {
+ noun: ["blackness", "brownness", "grayness"],
+ adjective: ["black", "brown", "gray"],
+ adverb: [],
+ },
+ pini: { noun: ["end", "ends"], adjective: ["ended"], adverb: [] },
+ pipi: {
+ noun: ["insect", "insects", "bug", "bugs"],
+ adjective: ["bug-like", "insect-like"],
+ adverb: [],
+ },
+ poka: { noun: ["side", "sides", "hips"], adjective: [], adverb: [] },
+ poki: { noun: ["container"], adjective: [], adverb: [] },
+ pona: {
+ noun: ["goodness", "simplicity"],
+ adjective: ["good", "simple"],
+ adverb: ["nicely"],
+ },
+ pu: {
+ noun: [],
+ adjective: [],
+ adverb: [],
+ },
+ sama: { noun: ["similarity"], adjective: [], adverb: ["equally"] },
+ seli: {
+ noun: ["fire", "heat", "chemical reaction", "chemical reactions"],
+ adjective: ["hot"],
+ adverb: [],
+ },
+ selo: {
+ noun: ["outer form", "skin", "boundary", "boundaries"],
+ adjective: [],
+ adverb: [],
+ },
+ seme: {
+ noun: ["what", "which"],
+ adjective: ["what", "which"],
+ adverb: [],
+ },
+ sewi: {
+ noun: ["above", "divinity"],
+ adjective: ["divine"],
+ adverb: ["divinely"],
+ },
+ sijelo: { noun: ["body", "bodies"], adjective: [], adverb: [] },
+ sike: {
+ noun: ["round thing", "round things", "cycle"],
+ adjective: ["round"],
+ adverb: ["repeatedly"],
+ },
+ sin: {
+ noun: ["new thing", "new things"],
+ adjective: ["new"],
+ adverb: ["newly"],
+ },
+ sina: { noun: ["you", "you all"], adjective: ["your"], adverb: [] },
+ sinpin: {
+ noun: ["face", "faces", "wall", "walls"],
+ adjective: [],
+ adverb: [],
+ },
+ sitelen: {
+ noun: ["writing", "writings", "image", "images"],
+ adjective: [],
+ adverb: [],
+ },
+ sona: {
+ noun: ["knowledge"],
+ adjective: ["knowledgeable"],
+ adverb: [],
+ },
+ soweli: {
+ noun: ["animal", "animals"],
+ adjective: ["animal-like"],
+ adverb: [],
+ },
+ suli: {
+ noun: ["hugeness", "importance"],
+ adjective: ["huge", "important"],
+ adverb: ["hugely", "importantly"],
+ },
+ suno: {
+ noun: ["light source", "light sources", "sun"],
+ adjective: ["shining"],
+ adverb: [],
+ },
+ supa: {
+ noun: ["horizontal surface", "horizontal surfaces"],
+ adjective: [],
+ adverb: [],
+ },
+ suwi: {
+ noun: ["sweetness", "cuteness", "innocence"],
+ adjective: ["sweet", "cute", "innocent"],
+ adverb: ["sweetly"],
+ },
+ tan: { noun: ["reason", "origin"], adjective: [], adverb: [] },
+ tawa: { noun: ["movement"], adjective: ["moving"], adverb: [] },
+ telo: { noun: ["liquid"], adjective: ["liquid"], adverb: [] },
+ tenpo: { noun: ["time"], adjective: [], adverb: [] },
+ toki: {
+ noun: [
+ "communication",
+ "communications",
+ "language",
+ "languages",
+ "hello",
+ ],
+ adjective: ["communicating"],
+ adverb: [],
+ },
+ tomo: { noun: ["house", "houses"], adjective: [], adverb: [] },
+ tonsi: {
+ noun: [
+ "transgender person",
+ "transgender people",
+ "non-binary person",
+ "non-binary people",
+ ],
+ adjective: ["transgender", "non-binary"],
+ adverb: [],
+ },
+ tu: { noun: ["pair"], adjective: ["two"], adverb: [] },
+ unpa: { noun: ["sex"], adjective: ["sexual"], adverb: ["sexually"] },
+ uta: { noun: ["mouth"], adjective: [], adverb: [] },
+ utala: {
+ noun: ["conflict", "difficulty"],
+ adjective: ["conflicting", "difficult"],
+ adverb: ["conflictingly", "difficultly"],
+ },
+ walo: {
+ noun: ["whiteness", "paleness"],
+ adjective: ["white", "pale"],
+ adverb: [],
+ },
+ wan: { noun: ["one"], adjective: ["one"], adverb: [] },
+ waso: {
+ noun: ["bird", "birds"],
+ adjective: ["bird-like"],
+ adverb: [],
+ },
+ wawa: {
+ noun: ["power", "powers"],
+ adjective: ["powerful"],
+ adverb: ["powerfully"],
+ },
+ weka: { noun: ["leaving"], adjective: ["leaving"], adverb: [] },
+ wile: {
+ noun: ["want", "wants", "need", "needs"],
+ adjective: [],
+ adverb: [],
+ },
+};
diff --git a/src/error.ts b/src/error.ts
new file mode 100644
index 0000000..d9d67f6
--- /dev/null
+++ b/src/error.ts
@@ -0,0 +1,23 @@
+/** Represents Error used by `Output`. */
+export class OutputError extends Error {}
+/**
+ * Represents errors that cannot be seen. This includes errors expected to be
+ * unreached as well as errors expected to be covered by non-error outputs.
+ */
+export class UnreachableError extends OutputError {
+ constructor() {
+ super("This is an error you shouldn't see... Please report this error.");
+ }
+}
+/** Represents Error due to things not implemented yet. */
+export class TodoError extends OutputError {
+ constructor(token: string) {
+ super(`${token} is not yet implemented.`);
+ }
+}
+/** Represents Error caused by unrecognized elements. */
+export class UnrecognizedError extends OutputError {
+ constructor(token: string) {
+ super(`${token} is unrecognized.`);
+ }
+}
diff --git a/src/filter.ts b/src/filter.ts
new file mode 100644
index 0000000..856b678
--- /dev/null
+++ b/src/filter.ts
@@ -0,0 +1,314 @@
+import { somePhraseInMultiplePhrases } from "./ast.ts";
+import { MultiplePhrases } from "./ast.ts";
+import {
+ Clause,
+ FullClause,
+ Modifier,
+ Phrase,
+ Preposition,
+ Sentence,
+ someModifierInPhrase,
+ someObjectInMultiplePredicate,
+ WordUnit,
+} from "./ast.ts";
+import { UnrecognizedError } from "./error.ts";
+
+/** Array of filter rules for a word unit. */
+export const WORD_UNIT_RULES: Array<(wordUnit: WordUnit) => boolean> = [
+ // avoid "seme ala seme"
+ (wordUnit) => {
+ if (wordUnit.type === "x ala x" && wordUnit.word === "seme") {
+ throw new UnrecognizedError('"seme ala seme"');
+ }
+ return true;
+ },
+ // avoid reduplication of "wan" and "tu"
+ (wordUnit) => {
+ if (
+ wordUnit.type === "reduplication" &&
+ (wordUnit.word === "wan" || wordUnit.word === "tu")
+ ) {
+ throw new UnrecognizedError(`reduplication of ${wordUnit.word}`);
+ }
+ return true;
+ },
+];
+/** Array of filter rules for a single modifier. */
+export const MODIFIER_RULES: Array<(modifier: Modifier) => boolean> = [
+ // quotation modifier cannot exist
+ (modifier) => {
+ if (modifier.type === "quotation") {
+ throw new UnrecognizedError("quotation as modifier");
+ }
+ return true;
+ },
+ // disallow _nanpa ala nanpa_
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.nanpa.type === "x ala x") {
+ throw new UnrecognizedError('"nanpa ala nanpa"');
+ }
+ return true;
+ },
+ // nanpa construction cannot contain preposition
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.phrase.type === "preposition") {
+ throw new UnrecognizedError("preposition inside nanpa");
+ }
+ return true;
+ },
+ // nanpa construction cannot contain preverb
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.phrase.type === "preverb") {
+ throw new UnrecognizedError("preverb inside nanpa");
+ }
+ return true;
+ },
+ // nanpa construction cannot contain quotation
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.phrase.type === "quotation") {
+ throw new UnrecognizedError("quotation inside nanpa");
+ }
+ return true;
+ },
+ // nanpa construction cannot contain pi
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.phrase.type === "default") {
+ if (
+ modifier.phrase.modifiers.some((modifier) => modifier.type === "pi")
+ ) {
+ throw new UnrecognizedError("pi inside nanpa");
+ }
+ }
+ return true;
+ },
+ // nanpa construction cannot contain nanpa
+ (modifier) => {
+ if (modifier.type === "nanpa" && modifier.phrase.type === "default") {
+ if (
+ modifier.phrase.modifiers.some((modifier) => modifier.type === "nanpa")
+ ) {
+ throw new UnrecognizedError("nanpa inside nanpa");
+ }
+ }
+ return true;
+ },
+ // pi cannot contain preposition
+ (modifier) => {
+ if (modifier.type === "pi" && modifier.phrase.type === "preposition") {
+ throw new UnrecognizedError("preposition inside pi");
+ }
+ return true;
+ },
+ // pi must follow phrases with modifier
+ (modifier) => {
+ if (modifier.type === "pi") {
+ const phrase = modifier.phrase;
+ if (phrase.type === "default" && phrase.modifiers.length === 0) {
+ throw new UnrecognizedError("pi followed by one word");
+ }
+ }
+ return true;
+ },
+ // pi cannot be nested
+ (modifier) => {
+ const checker = (modifier: Modifier) => {
+ if (
+ modifier.type === "default" || modifier.type === "proper words" ||
+ modifier.type === "quotation"
+ ) {
+ return false;
+ } else if (modifier.type === "nanpa") {
+ return someModifierInPhrase(modifier.phrase, false, checker);
+ } else if (modifier.type === "pi") {
+ return true;
+ } else {
+ throw new Error("unreachable error");
+ }
+ };
+ if (modifier.type === "pi") {
+ if (someModifierInPhrase(modifier.phrase, false, checker)) {
+ throw new UnrecognizedError("pi inside pi");
+ }
+ }
+ return true;
+ },
+];
+/** Array of filter rules for multiple modifiers. */
+export const MODIFIERS_RULES: Array<(modifier: Array) => boolean> = [
+ // no multiple pi
+ (modifiers) => {
+ if (modifiers.filter((modifier) => modifier.type === "pi").length > 1) {
+ throw new UnrecognizedError("multiple pi");
+ }
+ return true;
+ },
+ // no multiple nanpa
+ (modifiers) => {
+ if (modifiers.filter((modifier) => modifier.type === "nanpa").length > 1) {
+ throw new UnrecognizedError("multiple nanpa");
+ }
+ return true;
+ },
+ // no multiple proper words
+ (modifiers) => {
+ if (
+ modifiers.filter((modifier) => modifier.type === "proper words").length >
+ 1
+ ) {
+ throw new UnrecognizedError("multiple proper words");
+ }
+ return true;
+ },
+ // no multiple number words
+ (modifiers) => {
+ if (modifiers.filter(modifierIsNumeric).length > 1) {
+ throw new UnrecognizedError("multiple number words");
+ }
+ return true;
+ },
+];
+/** Array of filter rules for a single phrase. */
+export const PHRASE_RULE: Array<(phrase: Phrase) => boolean> = [
+ // Disallow quotation
+ (phrase) => {
+ if (phrase.type === "quotation") {
+ throw new UnrecognizedError("quotation as phrase");
+ }
+ return true;
+ },
+ // Disallow preverb modifiers other than _ala_
+ (phrase) => {
+ if (phrase.type === "preverb") {
+ if (!modifiersIsAlaOrNone(phrase.modifiers)) {
+ throw new UnrecognizedError('preverb with modifiers other than "ala"');
+ }
+ }
+ return true;
+ },
+ // No multiple number words
+ (phrase) => {
+ if (phrase.type === "default") {
+ if (
+ phrase.headWord.type === "numbers" ||
+ (phrase.headWord.type === "default" &&
+ (phrase.headWord.word === "wan" || phrase.headWord.word === "tu"))
+ ) {
+ if (phrase.modifiers.some(modifierIsNumeric)) {
+ throw new UnrecognizedError("Multiple number words");
+ }
+ }
+ }
+ return true;
+ },
+];
+/** Array of filter rules for preposition. */
+export const PREPOSITION_RULE: Array<(phrase: Preposition) => boolean> = [
+ // Disallow preverb modifiers other than _ala_
+ (preposition) => {
+ if (!modifiersIsAlaOrNone(preposition.modifiers)) {
+ throw new UnrecognizedError('preverb with modifiers other than "ala"');
+ }
+ return true;
+ },
+ // Disallow nested preposition
+ (preposition) => {
+ if (
+ somePhraseInMultiplePhrases(preposition.phrases, hasPrepositionInPhrase)
+ ) {
+ throw new UnrecognizedError("Preposition inside preposition");
+ }
+ return true;
+ },
+];
+/** Array of filter rules for clauses. */
+export const CLAUSE_RULE: Array<(clause: Clause) => boolean> = [
+ // disallow preposition in subject
+ (clause) => {
+ let phrases: MultiplePhrases;
+ if (clause.type === "phrases" || clause.type === "o vocative") {
+ phrases = clause.phrases;
+ } else if (clause.type === "li clause" || clause.type === "o clause") {
+ if (clause.subjects) {
+ phrases = clause.subjects;
+ } else {
+ return true;
+ }
+ } else {
+ return true;
+ }
+ if (somePhraseInMultiplePhrases(phrases, hasPrepositionInPhrase)) {
+ throw new UnrecognizedError("Preposition in subject");
+ }
+ return true;
+ },
+ // disallow preposition in object
+ (clause) => {
+ if (clause.type === "li clause" || clause.type === "o clause") {
+ if (
+ someObjectInMultiplePredicate(clause.predicates, hasPrepositionInPhrase)
+ ) {
+ throw new UnrecognizedError("Preposition in object");
+ }
+ }
+ return true;
+ },
+];
+export const FULL_CLAUSE_RULE: Array<(fullClase: FullClause) => boolean> = [
+ // Prevent "taso ala taso"
+ (fullClause) => {
+ if (fullClause.taso && fullClause.taso.type === "x ala x") {
+ throw new UnrecognizedError('"taso ala taso"');
+ }
+ return true;
+ },
+];
+/** Array of filter rules for multiple sentences. */
+export const SENTENCES_RULE: Array<(sentences: Array) => boolean> = [
+ // Only allow at most 2 sentences
+ (sentences) => {
+ if (sentences.length > 2) {
+ throw new UnrecognizedError("Multiple sentences");
+ }
+ return true;
+ },
+];
+/** Helper function for generating filter function. */
+export function filter(
+ rules: Array<(value: T) => boolean>,
+): (value: T) => boolean {
+ return (value) => rules.every((rule) => rule(value));
+}
+/** Helper function for checking whether a modifier is numeric. */
+function modifierIsNumeric(modifier: Modifier): boolean {
+ if (modifier.type === "default") {
+ const word = modifier.word;
+ return word.type === "numbers" ||
+ (word.type === "default" &&
+ (word.word === "wan" || word.word === "tu"));
+ }
+ return false;
+}
+/** Helper function for checking if the modifiers is exactly just _ala_ or nothing. */
+function modifiersIsAlaOrNone(modifiers: Array): boolean {
+ if (modifiers.length > 1) {
+ return false;
+ } else if (modifiers.length === 1) {
+ const [modifier] = modifiers;
+ return modifier.type === "default" && modifier.word.type === "default" &&
+ modifier.word.word === "ala";
+ }
+ return true;
+}
+function hasPrepositionInPhrase(phrase: Phrase): boolean {
+ if (phrase.type === "default") {
+ return false;
+ } else if (phrase.type === "preposition") {
+ return true;
+ } else if (phrase.type === "preverb") {
+ return hasPrepositionInPhrase(phrase.phrase);
+ } else if (phrase.type === "quotation") {
+ return false;
+ } else {
+ throw new Error("unreachable");
+ }
+}
diff --git a/src/main.ts b/src/main.ts
new file mode 100644
index 0000000..3fa2600
--- /dev/null
+++ b/src/main.ts
@@ -0,0 +1,38 @@
+import { translate } from "./translator.ts";
+
+// TODO: maybe use worker
+document.addEventListener("DOMContentLoaded", () => {
+ const input = document.getElementById("input") as HTMLTextAreaElement;
+ const output = document.getElementById("output") as HTMLUListElement;
+ const error = document.getElementById("error") as HTMLParagraphElement;
+ const button = document.getElementById(
+ "translate-button",
+ ) as HTMLButtonElement;
+ const listener = () => {
+ while (output.children.length > 0) {
+ output.removeChild(output.children[0]);
+ }
+ error.innerText = "";
+ const translations = translate(input.value);
+ if (translations.isError()) {
+ error.innerText = translations.error?.message ?? "No error provided";
+ } else {
+ const set = new Set();
+ for (const translation of translations.output) {
+ if (!set.has(translation)) {
+ const list = document.createElement("li");
+ list.innerText = translation;
+ output.appendChild(list);
+ set.add(translation);
+ }
+ }
+ }
+ };
+ button.addEventListener("click", listener);
+ input.addEventListener("keydown", (event) => {
+ if (event.code === "Enter") {
+ listener();
+ event.preventDefault();
+ }
+ });
+});
diff --git a/src/output.ts b/src/output.ts
new file mode 100644
index 0000000..f725ae0
--- /dev/null
+++ b/src/output.ts
@@ -0,0 +1,83 @@
+import { OutputError } from "./error.ts";
+/** Represents possibilities and error. */
+export class Output {
+ /** Represents possibilities, considered error when the array is empty. */
+ output: Array;
+ /**
+ * An optional error, should be supplied if and only if the array is empty.
+ */
+ error: null | OutputError;
+ constructor(output?: undefined | null | Array | OutputError) {
+ if (Array.isArray(output)) {
+ this.output = output;
+ if (output.length === 0) {
+ this.error = new OutputError("no error provided");
+ } else this.error = null;
+ } else if (output instanceof OutputError) {
+ this.output = [];
+ this.error = output;
+ } else {
+ this.output = [];
+ this.error = new OutputError();
+ }
+ }
+ private setError(error: OutputError) {
+ if (this.output.length === 0 && !this.error) this.error = error;
+ }
+ private push(value: T): void {
+ this.output.push(value);
+ this.error = null;
+ }
+ private append({ output, error }: Output): void {
+ this.output = [...this.output, ...output];
+ if (this.output.length > 0) this.error = null;
+ else this.error = error;
+ }
+ /** Returns true when the output array is empty */
+ isError(): boolean {
+ return this.output.length === 0;
+ }
+ filter(mapper: (value: T) => boolean): Output {
+ return this.map((value) => {
+ if (mapper(value)) {
+ return value;
+ } else {
+ throw new OutputError("no error provided");
+ }
+ });
+ }
+ /**
+ * Maps all values and returns new Output. For convenience, the mapper
+ * function can throw OutputError; Other kinds of errors will be ignored.
+ */
+ map(mapper: (value: T) => U): Output {
+ if (this.isError()) return new Output(this.error);
+ const wholeOutput = new Output();
+ for (const value of this.output) {
+ try {
+ wholeOutput.push(mapper(value));
+ } catch (error) {
+ if (error instanceof OutputError) this.setError(error);
+ else throw error;
+ }
+ }
+ return wholeOutput;
+ }
+ /**
+ * Accepts mapper function that returns another Output. flatMap takes all
+ * values and flattens them into single array for Output.
+ */
+ flatMap(mapper: (value: T) => Output): Output {
+ if (this.isError()) return new Output(this.error);
+ const wholeOutput = new Output();
+ for (const value of this.output) wholeOutput.append(mapper(value));
+ return wholeOutput;
+ }
+ static concat(...outputs: Array>): Output {
+ const wholeOutput = new Output();
+ for (const output of outputs) {
+ wholeOutput.append(output);
+ }
+ return wholeOutput;
+ }
+}
diff --git a/src/parser.ts b/src/parser.ts
new file mode 100644
index 0000000..14df87a
--- /dev/null
+++ b/src/parser.ts
@@ -0,0 +1,656 @@
+import {
+ Clause,
+ FullClause,
+ Modifier,
+ MultiplePhrases,
+ MultiplePredicates,
+ Phrase,
+ Preposition,
+ Quotation,
+ Sentence,
+ WordUnit,
+} from "./ast.ts";
+import { UnreachableError, UnrecognizedError } from "./error.ts";
+import { Output } from "./output.ts";
+import {
+ CONTENT_WORD,
+ PREPOSITION,
+ PREVERB,
+ SPECIAL_SUBJECT,
+} from "./vocabulary.ts";
+import {
+ CLAUSE_RULE,
+ filter,
+ FULL_CLAUSE_RULE,
+ MODIFIER_RULES,
+ MODIFIERS_RULES,
+ PHRASE_RULE,
+ PREPOSITION_RULE,
+ SENTENCES_RULE,
+ WORD_UNIT_RULES,
+} from "./filter.ts";
+
+/** A single parsing result. */
+type ValueRest = { value: T; rest: string };
+/** A special kind of Output that parsers returns. */
+type ParserOutput = Output>;
+
+/** Wrapper of parser function with added methods for convenience. */
+class Parser {
+ constructor(public readonly parser: (src: string) => ParserOutput) {}
+ /**
+ * Maps the parsing result. For convenience, the mapper function can throw
+ * an OutputError; Other kinds of error are ignored.
+ */
+ map(mapper: (value: T) => U): Parser {
+ return new Parser((src) =>
+ this.parser(src).map(({ value, rest }) => ({
+ value: mapper(value),
+ rest,
+ }))
+ );
+ }
+ /**
+ * Filters outputs. The mapper may throw OutputError as well in place of
+ * returning false.
+ */
+ filter(mapper: (value: T) => boolean): Parser {
+ return new Parser((src) =>
+ this.parser(src).filter(({ value }) => mapper(value))
+ );
+ }
+ /**
+ * Parses `this` then passes the parsing result in the mapper. The resulting
+ * parser is then also parsed.
+ */
+ then(mapper: (value: T) => Parser): Parser {
+ return new Parser((src) =>
+ this.parser(src).flatMap(({ value, rest }) => mapper(value).parser(rest))
+ );
+ }
+ /** Takes another parser and discards the parsing result of `this`. */
+ with(parser: Parser): Parser {
+ return sequence(this, parser).map(([_, output]) => output);
+ }
+ /** Takes another parser and discards its parsing result. */
+ skip(parser: Parser): Parser {
+ return sequence(this, parser).map(([output, _]) => output);
+ }
+}
+/**
+ * Uses Regular Expression to create parser. The parser outputs
+ * RegExpMatchArray, which is what `string.match( ... )` returns.
+ */
+function match(regex: RegExp): Parser {
+ const newRegex = new RegExp("^" + regex.source, regex.flags);
+ return new Parser((src) => {
+ const match = src.match(newRegex);
+ if (match) {
+ return new Output([{ value: match, rest: src.slice(match[0].length) }]);
+ } else if (src === "") {
+ return new Output(new UnrecognizedError("Unexpected end of sentence"));
+ } else {
+ const token = src.match(/(.*)(?:\s|$)/)?.[1];
+ if (token) return new Output(new UnrecognizedError(`"${token}"`));
+ else return new Output(new UnreachableError());
+ }
+ });
+}
+/** Parses nothing and leaves the source string intact. */
+function nothing(): Parser {
+ return new Parser((src) => new Output([{ value: null, rest: src }]));
+}
+/** Parses the end of line (or the end of sentence in context of Toki Pona) */
+function eol(): Parser {
+ return new Parser((src) => {
+ if (src === "") return new Output([{ value: null, rest: "" }]);
+ else return new Output(new UnrecognizedError(`"${src}"`));
+ });
+}
+/** Parses without consuming the source string */
+function lookAhead(parser: Parser): Parser {
+ return new Parser((src) =>
+ parser.parser(src).map(({ value }) => ({ value, rest: src }))
+ );
+}
+/**
+ * Lazily evaluates the parser function only when needed. Useful for recursive
+ * parsers.
+ */
+function lazy(parser: () => Parser): Parser {
+ return new Parser((src) => parser().parser(src));
+}
+/**
+ * Evaluates all parsers on the same source string and sums it all on a single
+ * Output.
+ */
+function choice(...choices: Array>): Parser {
+ return new Parser((src) =>
+ new Output(choices).flatMap((parser) => parser.parser(src))
+ );
+}
+/**
+ * Tries to evaluate each parsers one at a time and only returns the first
+ * Output without error.
+ */
+function choiceOnlyOne(...choices: Array>): Parser {
+ return new Parser((src) =>
+ choices.reduce((output, parser) => {
+ if (output.isError()) return parser.parser(src);
+ else return output;
+ }, new Output>())
+ );
+}
+/** Combines `parser` and the `nothing` parser, and output `null | T`. */
+function optional(parser: Parser): Parser {
+ return choice(parser, nothing());
+}
+/** Takes all parsers and applies them one after another. */
+function sequence>(
+ ...sequence: { [I in keyof T]: Parser } & { length: T["length"] }
+): Parser {
+ // We resorted to using `any` types here, make sure it works properly
+ return new Parser((src) =>
+ sequence.reduce(
+ (output, parser) =>
+ output.flatMap(({ value, rest }) =>
+ parser.parser(rest).map(({ value: newValue, rest }) => ({
+ value: [...value, newValue],
+ rest,
+ }))
+ ),
+ // deno-lint-ignore no-explicit-any
+ new Output>([{ value: [], rest: src }]),
+ )
+ );
+}
+/**
+ * Parses `parser` multiple times and returns an `Array`. The resulting
+ * output includes all outputs from parsing nothing to parsing as many as
+ * possible.
+ *
+ * ## ⚠️ Warning
+ *
+ * Will cause infinite recursion if the parser can parse nothing.
+ */
+function many(parser: Parser): Parser> {
+ return choice(
+ sequence(parser, lazy(() => many(parser))).map((
+ [first, rest],
+ ) => [first, ...rest]),
+ nothing().map(() => []),
+ );
+}
+/**
+ * Like `many` but parses at least once.
+ *
+ * ## ⚠️ Warning
+ *
+ * Will cause infinite recursion if the parser can parse nothing.
+ */
+function manyAtLeastOnce(parser: Parser): Parser> {
+ return sequence(parser, many(parser)).map((
+ [first, rest],
+ ) => [first, ...rest]);
+}
+/**
+ * Parses `parser` multiple times and returns an `Array`. This function is
+ * exhaustive unlike `many`.
+ *
+ * ## ⚠️ Warning
+ *
+ * Will cause infinite recursion if the parser can parse nothing.
+ */
+function all(parser: Parser): Parser> {
+ return choiceOnlyOne(
+ sequence(parser, lazy(() => all(parser))).map((
+ [first, rest],
+ ) => [first, ...rest]),
+ nothing().map(() => []),
+ );
+}
+/**
+ * Like `all` but parses at least once.
+ *
+ * ## ⚠️ Warning
+ *
+ * Will cause infinite recursion if the parser can parse nothing.
+ */
+function allAtLeastOnce(parser: Parser): Parser> {
+ return sequence(parser, all(parser)).map(([first, rest]) => [first, ...rest]);
+}
+/** Parses comma. */
+function comma(): Parser {
+ return match(/,\s*/).map(() => ",");
+}
+/** Parses an optional comma. */
+function optionalComma(): Parser {
+ return optional(comma());
+}
+/** Parses lowercase word. */
+function word(): Parser {
+ return match(/([a-z]+)\s*/).map(([_, word]) => word);
+}
+/**
+ * Parses all at least one uppercase words and combines them all into single
+ * string. This function is exhaustive like `all`.
+ */
+function properWords(): Parser {
+ return allAtLeastOnce(match(/([A-Z][a-z]*)\s*/).map(([_, word]) => word)).map(
+ (array) => array.join(" "),
+ );
+}
+/** Parses word only from `set`. */
+function wordFrom(set: Set, description: string): Parser {
+ return word().filter((word) => {
+ if (set.has(word)) return true;
+ else throw new UnrecognizedError(`"${word}" as ${description}`);
+ });
+}
+/** Parses a specific word. */
+function specificWord(thatWord: string): Parser {
+ return word().filter((thisWord) => {
+ if (thatWord === thisWord) return true;
+ else throw new UnrecognizedError(`"${thisWord}" instead of "${thatWord}"`);
+ });
+}
+/** Parses word unit without numbers. */
+function wordUnit(word: Parser): Parser {
+ return choice(
+ word.then((word) =>
+ manyAtLeastOnce(specificWord(word)).map((words) => ({
+ type: "reduplication",
+ word,
+ count: words.length + 1,
+ } as WordUnit))
+ ),
+ word.then((word) => specificWord("ala").with(specificWord(word))).map((
+ word,
+ ) => ({ type: "x ala x", word } as WordUnit)),
+ word.map((word) => ({ type: "default", word } as WordUnit)),
+ ).filter(filter(WORD_UNIT_RULES));
+}
+/** Parses number words in order. */
+function number(): Parser> {
+ return sequence(
+ many(choice(specificWord("ale"), specificWord("ali"))),
+ many(specificWord("mute")),
+ many(specificWord("luka")),
+ many(specificWord("tu")),
+ many(specificWord("wan")),
+ ).map((array) => {
+ const output = array.flat();
+ if (output.length >= 2) return output;
+ else throw new UnreachableError();
+ });
+}
+/** Parses multiple modifiers */
+function modifiers(): Parser> {
+ return sequence(
+ many(
+ choice(
+ wordUnit(wordFrom(CONTENT_WORD, "modifier")).map((word) => ({
+ type: "default",
+ word,
+ } as Modifier)).filter(filter(MODIFIER_RULES)),
+ properWords().map((
+ words,
+ ) => ({ type: "proper words", words } as Modifier)).filter(
+ filter(MODIFIER_RULES),
+ ),
+ number().map((
+ numbers,
+ ) => ({
+ type: "default",
+ word: { type: "numbers", numbers },
+ } as Modifier)).filter(filter(MODIFIER_RULES)),
+ quotation().map((
+ quotation,
+ ) => ({ type: "quotation", quotation } as Modifier)).filter(
+ filter(MODIFIER_RULES),
+ ),
+ ),
+ ),
+ many(
+ sequence(wordUnit(specificWord("nanpa")), phrase()).map((
+ [nanpa, phrase],
+ ) => ({
+ type: "nanpa",
+ nanpa,
+ phrase,
+ } as Modifier)).filter(filter(MODIFIER_RULES)),
+ ),
+ many(
+ specificWord("pi").with(phrase()).map((phrase) => ({
+ type: "pi",
+ phrase,
+ } as Modifier)).filter(filter(MODIFIER_RULES)),
+ ),
+ ).map((
+ [modifiers, nanpaModifiers, piModifiers],
+ ) => [...modifiers, ...nanpaModifiers, ...piModifiers]).filter(
+ filter(MODIFIERS_RULES),
+ );
+}
+/** Parses phrases including preverbial phrases. */
+function phrase(): Parser {
+ return choice(
+ sequence(number(), lazy(modifiers)).map((
+ [numbers, modifiers],
+ ) => ({
+ type: "default",
+ headWord: { type: "numbers", numbers },
+ modifiers,
+ } as Phrase)),
+ sequence(
+ wordUnit(wordFrom(PREVERB, "preverb")),
+ lazy(modifiers),
+ lazy(phrase),
+ ).map((
+ [preverb, modifiers, phrase],
+ ) => ({
+ type: "preverb",
+ preverb,
+ modifiers,
+ phrase,
+ } as Phrase)),
+ lazy(preposition).map((preposition) => ({
+ type: "preposition",
+ preposition,
+ } as Phrase)),
+ sequence(
+ wordUnit(wordFrom(CONTENT_WORD, "headword")),
+ lazy(modifiers),
+ ).map(([headWord, modifiers]) => ({
+ type: "default",
+ headWord,
+ modifiers,
+ } as Phrase)),
+ quotation().map((
+ quotation,
+ ) => ({ type: "quotation", quotation } as Phrase)),
+ ).filter(filter(PHRASE_RULE));
+}
+/**
+ * Parses nested phrases with given nesting rule, only accepting the top level
+ * operation.
+ */
+function nestedPhrasesOnly(
+ nestingRule: Array<"en" | "li" | "o" | "e" | "anu">,
+): Parser {
+ if (nestingRule.length === 0) {
+ return phrase().map(
+ (phrase) => ({ type: "single", phrase } as MultiplePhrases),
+ );
+ } else {
+ const [first, ...rest] = nestingRule;
+ let type: "and conjunction" | "anu";
+ if (["en", "li", "o", "e"].indexOf(first) !== -1) {
+ type = "and conjunction";
+ } else {
+ type = "anu";
+ }
+ return sequence(
+ nestedPhrases(rest),
+ manyAtLeastOnce(
+ optionalComma().with(specificWord(first)).with(
+ nestedPhrases(rest),
+ ),
+ ),
+ ).map(([group, moreGroups]) => ({
+ type,
+ phrases: [group, ...moreGroups],
+ }));
+ }
+}
+/** Parses nested phrases with given nesting rule. */
+function nestedPhrases(
+ nestingRule: Array<"en" | "li" | "o" | "e" | "anu">,
+): Parser {
+ if (nestingRule.length === 0) {
+ return phrase().map(
+ (phrase) => ({ type: "single", phrase } as MultiplePhrases),
+ );
+ } else {
+ return choice(
+ nestedPhrasesOnly(nestingRule),
+ nestedPhrases(nestingRule.slice(1)),
+ );
+ }
+}
+/** Parses phrases separated by _en_ or _anu_. */
+function subjectPhrases(): Parser {
+ return choice(
+ nestedPhrasesOnly(["en", "anu"]),
+ nestedPhrasesOnly(["anu", "en"]),
+ phrase().map((phrase) => ({ type: "single", phrase })),
+ );
+}
+/** Parses prepositional phrase. */
+function preposition(): Parser {
+ return sequence(
+ wordUnit(wordFrom(PREPOSITION, "preposition")),
+ modifiers(),
+ nestedPhrases(["anu"]),
+ ).map(([preposition, modifiers, phrases]) => ({
+ preposition,
+ modifiers,
+ phrases,
+ })).filter(filter(PREPOSITION_RULE));
+}
+/**
+ * Parses associated predicates whose predicates only uses top level operator.
+ */
+function associatedPredicates(
+ nestingRule: Array<"li" | "o" | "anu">,
+): Parser {
+ return sequence(
+ nestedPhrasesOnly(nestingRule),
+ optional(
+ optionalComma().with(specificWord("e")).with(
+ nestedPhrases(["e", "anu"]),
+ ),
+ ),
+ many(optionalComma().with(preposition())),
+ ).map(([predicates, objects, prepositions]) => {
+ if (!objects && prepositions.length === 0) {
+ throw new UnreachableError();
+ } else {
+ return {
+ type: "associated",
+ predicates,
+ objects,
+ prepositions,
+ };
+ }
+ });
+}
+/** Parses multiple predicates without _li_ nor _o_ at the beginning. */
+function multiplePredicates(
+ nestingRule: Array<"li" | "o" | "anu">,
+): Parser {
+ if (nestingRule.length === 0) {
+ return choice(
+ associatedPredicates([]),
+ phrase().map((
+ predicate,
+ ) => ({ type: "single", predicate } as MultiplePredicates)),
+ );
+ } else {
+ const [first, ...rest] = nestingRule;
+ let type: "and conjunction" | "anu";
+ if (first === "li" || first === "o") {
+ type = "and conjunction";
+ } else {
+ type = "anu";
+ }
+ return choice(
+ associatedPredicates(nestingRule),
+ sequence(
+ choice(
+ associatedPredicates(nestingRule),
+ multiplePredicates(rest),
+ ),
+ manyAtLeastOnce(
+ optionalComma().with(specificWord(first)).with(
+ choice(
+ associatedPredicates(nestingRule),
+ multiplePredicates(rest),
+ ),
+ ),
+ ),
+ ).map(([group, moreGroups]) => ({
+ type,
+ predicates: [group, ...moreGroups],
+ } as MultiplePredicates)),
+ multiplePredicates(rest),
+ );
+ }
+}
+/** Parses a single clause. */
+function clause(): Parser {
+ return choice(
+ sequence(
+ wordFrom(SPECIAL_SUBJECT, "mi/sina subject"),
+ multiplePredicates(["li", "anu"]),
+ ).map(([subject, predicates]) => ({
+ type: "li clause",
+ subjects: {
+ type: "single",
+ phrase: {
+ type: "default",
+ headWord: { type: "default", word: subject },
+ alaQuestion: false,
+ modifiers: [],
+ },
+ },
+ predicates,
+ } as Clause)),
+ sequence(
+ preposition(),
+ many(optionalComma().with(preposition())),
+ ).map(([preposition, morePreposition]) => ({
+ type: "prepositions",
+ prepositions: [preposition, ...morePreposition],
+ } as Clause)),
+ subjectPhrases().map((phrases) => {
+ if (phrases.type === "single" && phrases.phrase.type === "quotation") {
+ throw new UnreachableError();
+ } else {
+ return { type: "phrases", phrases } as Clause;
+ }
+ }),
+ subjectPhrases().skip(specificWord("o")).map((phrases) => ({
+ type: "o vocative",
+ phrases,
+ } as Clause)),
+ sequence(
+ subjectPhrases(),
+ optionalComma().with(specificWord("li")).with(
+ multiplePredicates(["li", "anu"]),
+ ),
+ ).map(([subjects, predicates]) => ({
+ type: "li clause",
+ subjects,
+ predicates,
+ } as Clause)),
+ sequence(
+ specificWord("o").with(multiplePredicates(["o", "anu"])),
+ ).map(([predicates]) => ({
+ type: "o clause",
+ subjects: null,
+ predicates,
+ } as Clause)),
+ sequence(
+ subjectPhrases(),
+ optionalComma().with(specificWord("o")).with(
+ multiplePredicates(["o", "anu"]),
+ ),
+ ).map(([subjects, predicates]) => ({
+ type: "o clause",
+ subjects: subjects,
+ predicates,
+ } as Clause)),
+ quotation().map((quotation) => ({
+ type: "quotation",
+ quotation,
+ } as Clause)),
+ ).filter(filter(CLAUSE_RULE));
+}
+/** Parses a single clause including precaluse and postclause. */
+function fullClause(): Parser {
+ return sequence(
+ optional(wordUnit(specificWord("taso")).skip(optionalComma())),
+ clause(),
+ optional(
+ optionalComma().with(specificWord("anu")).with(
+ wordUnit(specificWord("seme")),
+ ),
+ ),
+ ).map(([taso, clause, anuSeme]) => ({
+ taso,
+ anuSeme,
+ clause,
+ })).filter(filter(FULL_CLAUSE_RULE));
+}
+/** parses _la_ with optional comma around. */
+function la(): Parser {
+ return choice(
+ comma().with(specificWord("la")),
+ specificWord("la").skip(comma()),
+ specificWord("la"),
+ );
+}
+/** Parses a single full sentence with optional punctuations. */
+function sentence(): Parser {
+ return sequence(
+ fullClause(),
+ many(la().with(fullClause())),
+ choice(
+ eol().map(() => ""),
+ lookAhead(closeQuotationMark()).map(() => ""),
+ match(/([.,:;?!])\s*/).map(([_, punctuation]) => punctuation),
+ ),
+ ).map(([clause, moreClauses, punctuation]) => ({
+ laClauses: [clause, ...moreClauses],
+ punctuation,
+ }));
+}
+/** Parses opening quotation mark */
+function openQuotationMark(): Parser {
+ return match(/(["“«「])\s*/).map(([_, mark]) => mark);
+}
+/** Parses closing quotation mark */
+function closeQuotationMark(): Parser {
+ return match(/(["”»」])\s*/).map(([_, mark]) => mark);
+}
+/** Parses multiple sentences inside quotation mark */
+function quotation(): Parser {
+ return sequence(
+ openQuotationMark(),
+ many(lazy(sentence)).filter(filter(SENTENCES_RULE)),
+ closeQuotationMark(),
+ ).map(([leftMark, sentences, rightMark]) => {
+ if (leftMark === '"' || leftMark === "“") {
+ if (rightMark !== '"' && rightMark !== "”") {
+ throw new UnrecognizedError("Mismatched quotation marks");
+ }
+ } else if (leftMark === "«") {
+ if (rightMark !== "»") {
+ throw new UnrecognizedError("Mismatched quotation marks");
+ }
+ } else if (leftMark === "「") {
+ if (rightMark !== "」") {
+ throw new UnrecognizedError("Mismatched quotation marks");
+ }
+ } else throw new UnreachableError();
+ return { sentences, leftMark, rightMark };
+ });
+}
+/** A multiple Toki Pona sentence parser. */
+export function parser(src: string): Output> {
+ return match(/\s*/).with(allAtLeastOnce(sentence())).skip(eol()).filter(
+ filter(SENTENCES_RULE),
+ ).parser(src)
+ .map(({ value }) => value);
+}
diff --git a/src/translator.ts b/src/translator.ts
new file mode 100644
index 0000000..aeb6701
--- /dev/null
+++ b/src/translator.ts
@@ -0,0 +1,343 @@
+import { Clause } from "./ast.ts";
+import {
+ FullClause,
+ Modifier,
+ MultiplePhrases,
+ Phrase,
+ Sentence,
+ WordUnit,
+} from "./ast.ts";
+import { Output } from "./output.ts";
+import { parser } from "./parser.ts";
+import { TodoError } from "./error.ts";
+import { DEFINITION } from "./definition.ts";
+import { OutputError } from "./error.ts";
+import { UnreachableError } from "./error.ts";
+
+/** A special kind of Output that translators returns. */
+export type TranslationOutput = Output;
+
+const WORD_TO_NUMBER: { [word: string]: number } = {
+ ale: 100,
+ ali: 100,
+ mute: 20,
+ luka: 5,
+ tu: 2,
+ wan: 1,
+};
+/**
+ * Helper function for turning array or tuple of Output into Output of array or
+ * tuple.
+ */
+// TODO: maybe there's a better name
+function rotate>(
+ array: { [I in keyof T]: Output } & { length: T["length"] },
+): Output {
+ // We resorted to using `any` types here, make sure it works properly
+ return array.reduce(
+ // deno-lint-ignore no-explicit-any
+ (result: Output, output) =>
+ result.flatMap((left) => output.map((right) => [...left, right])),
+ // deno-lint-ignore no-explicit-any
+ new Output([[]]),
+ ) as Output;
+}
+function definition(
+ kind: "noun" | "adjective" | "adverb",
+ word: string,
+): TranslationOutput {
+ return Output.concat(
+ new Output(new OutputError(`No ${kind} translation found for ${word}.`)),
+ new Output(DEFINITION[word][kind]),
+ );
+}
+function number(words: Array): number {
+ return words.reduce((number, word) => number + WORD_TO_NUMBER[word], 0);
+}
+function wordUnitAs(
+ kind: "noun" | "adjective" | "adverb",
+ word: WordUnit,
+): TranslationOutput {
+ if (word.type === "default") {
+ return definition(kind, word.word);
+ } else if (word.type === "numbers") {
+ return new Output([number(word.numbers).toString()]);
+ } else if (word.type === "reduplication") {
+ return definition(kind, word.word).map((noun) =>
+ new Array(word.count).fill(noun).join(" ")
+ );
+ } else {
+ return new Output(new UnreachableError());
+ }
+}
+function modifierAs(
+ kind: "noun" | "adjective" | "adverb",
+ modifier: Modifier,
+): TranslationOutput {
+ if (modifier.type === "default") {
+ return wordUnitAs(kind, modifier.word);
+ } else if (modifier.type === "nanpa" || modifier.type === "proper words") {
+ return new Output();
+ } else if (modifier.type === "pi") {
+ if (kind === "adverb") {
+ return new Output();
+ }
+ return phraseAs(kind, modifier.phrase, { named: false, suffix: false });
+ } else {
+ return new Output(
+ new TodoError(`translating ${modifier.type} as adjective`),
+ );
+ }
+}
+function modifierAsSuffix(
+ kind: "noun" | "adjective",
+ suffix: Modifier,
+): TranslationOutput {
+ let construction: string;
+ if (kind === "noun") {
+ construction = "of X";
+ } else {
+ construction = "in X way";
+ }
+ if (suffix.type === "default") {
+ return wordUnitAs(kind, suffix.word).map((translation) =>
+ construction.replace("X", translation)
+ );
+ } else if (suffix.type === "nanpa") {
+ return phraseAs(kind, suffix.phrase, {
+ named: kind === "noun",
+ suffix: false,
+ }).map(
+ (translation) => `in position ${translation}`,
+ );
+ } else if (suffix.type === "pi") {
+ return phraseAs(kind, suffix.phrase, {
+ named: kind === "noun",
+ suffix: false,
+ }).map((
+ translation,
+ ) => construction.replace("X", translation));
+ } else if (suffix.type === "proper words") {
+ return new Output([`named ${suffix.words}`]);
+ } else {
+ return new Output(
+ new TodoError(`translation of ${suffix.type} as noun`),
+ );
+ }
+}
+function defaultPhraseAs(
+ kind: "noun" | "adjective",
+ phrase: Phrase & { type: "default" },
+ options?: {
+ named?: boolean;
+ suffix?: boolean;
+ },
+): TranslationOutput {
+ const named = options?.named ?? true;
+ const suffix = options?.suffix ?? true;
+ const name = (
+ phrase.modifiers.filter(
+ (modifier) => modifier.type === "proper words",
+ )[0] as undefined | (Modifier & { type: "proper words" })
+ )?.words;
+ if (name && !named) {
+ return new Output();
+ }
+ let modifierKind: "adjective" | "adverb";
+ if (kind === "noun") {
+ modifierKind = "adjective";
+ } else if (kind === "adjective") {
+ modifierKind = "adverb";
+ }
+ const headWord = wordUnitAs(kind, phrase.headWord);
+ const modifierNoName = phrase.modifiers.filter((
+ modifier,
+ ) => modifier.type !== "proper words");
+ const modifierTranslation: Array = modifierNoName.map(
+ (modifier) => modifierAs(modifierKind, modifier),
+ );
+ const translations = rotate([headWord, rotate(modifierTranslation)] as const)
+ .map(
+ ([headWord, modifiers]) =>
+ [...modifiers.slice().reverse(), headWord].join(" "),
+ ).map(
+ (translation) => {
+ if (name) {
+ return `${translation} named ${name}`;
+ } else {
+ return translation;
+ }
+ },
+ );
+ if (suffix) {
+ const extraTranslations: Array = [
+ ...modifierNoName.keys(),
+ ].map(
+ (i) => {
+ const suffixTranslation = modifierAsSuffix(kind, modifierNoName[i]);
+ const modifierTranslation = [
+ ...modifierNoName.slice(0, i),
+ ...modifierNoName.slice(i + 1),
+ ].map((modifier) => modifierAs(modifierKind, modifier));
+ return rotate([headWord, rotate(modifierTranslation)] as const).map(
+ ([headWord, modifiers]) =>
+ [...modifiers.slice().reverse(), headWord].join(" "),
+ ).map(
+ (translation) => {
+ if (name) {
+ return `${translation} named ${name}`;
+ } else {
+ return translation;
+ }
+ },
+ ).flatMap((left) =>
+ suffixTranslation.map((right) => [left, right].join(" "))
+ );
+ },
+ );
+ return Output.concat(translations, ...extraTranslations);
+ } else {
+ return translations;
+ }
+}
+function phraseAs(kind: "noun" | "adjective", phrase: Phrase, options?: {
+ named?: boolean;
+ suffix?: boolean;
+}): TranslationOutput {
+ if (phrase.type === "default") {
+ return defaultPhraseAs(kind, phrase, options);
+ } else {
+ return new Output(new TodoError(`translation of ${phrase.type}`));
+ }
+}
+function translateMultiplePhrases(
+ phrases: MultiplePhrases,
+ translator: (phrase: Phrase) => TranslationOutput,
+ level = 2,
+): TranslationOutput {
+ if (phrases.type === "single") {
+ return translator(phrases.phrase);
+ } else if (phrases.type === "and conjunction" || phrases.type === "anu") {
+ let conjunction: string;
+ if (phrases.type === "and conjunction") {
+ conjunction = "and";
+ } else {
+ conjunction = "or";
+ }
+ const translations = rotate(
+ phrases.phrases.map((phrases) =>
+ translateMultiplePhrases(phrases, translator, level - 1)
+ ),
+ );
+ if (level === 2) {
+ return translations.map((phrases) => {
+ if (phrases.length === 2) {
+ return [phrases[0], conjunction, phrases[1]].join(" ");
+ } else {
+ const comma = phrases.slice(0, phrases.length - 1);
+ const last = phrases[phrases.length - 1];
+ return [
+ comma.map((translation) => [translation, ", "].join()).join(),
+ conjunction,
+ " ",
+ last,
+ ].join("");
+ }
+ });
+ } else if (level === 1) {
+ return translations.map((phrases) =>
+ phrases.join([" ", conjunction, " "].join())
+ );
+ } else {
+ throw new Error("unreachable");
+ }
+ } else {
+ throw new Error("unreachable");
+ }
+}
+/** Translates a clause. */
+function translateClause(clause: Clause): TranslationOutput {
+ if (clause.type === "phrases") {
+ const hasEn = (phrases: MultiplePhrases): boolean => {
+ if (phrases.type === "single") {
+ return false;
+ } else if (phrases.type === "and conjunction") {
+ return true;
+ } else if (phrases.type === "anu") {
+ return phrases.phrases.some(hasEn);
+ } else {
+ throw new Error("unreachable");
+ }
+ };
+ const phrases = clause.phrases;
+ const translations = translateMultiplePhrases(
+ phrases,
+ (phrase) => phraseAs("noun", phrase),
+ );
+ if (hasEn(phrases)) {
+ return translations;
+ } else {
+ return Output.concat(
+ translateMultiplePhrases(
+ phrases,
+ (phrase) => phraseAs("adjective", phrase),
+ ),
+ translations,
+ );
+ }
+ } else if (clause.type === "o vocative") {
+ return translateMultiplePhrases(
+ clause.phrases,
+ (phrase) => phraseAs("noun", phrase).map((phrase) => `hey ${phrase}`),
+ );
+ } else {
+ return new Output(new TodoError(`translation for ${clause.type}`));
+ }
+}
+/** Translates a full clause. */
+function translateFullClause(fullClause: FullClause): TranslationOutput {
+ let but = "";
+ const taso = fullClause.taso;
+ if (taso) {
+ if (taso.type === "default") {
+ but = "but ";
+ } else if (taso.type === "reduplication") {
+ but = new Array(taso.count).fill("but ").join();
+ }
+ }
+ let isntIt = "";
+ const anuSeme = fullClause.anuSeme;
+ if (anuSeme) {
+ if (anuSeme.type === "default") {
+ isntIt = ", isn't it";
+ } else if (anuSeme.type === "reduplication") {
+ // TODO: better translation
+ isntIt = new Array(anuSeme.count).fill(", isn't it").join();
+ }
+ }
+ return translateClause(fullClause.clause).map((clause) =>
+ [but, clause, isntIt].join("")
+ );
+}
+/** Translates a single sentence. */
+function translateSentence(sentence: Sentence): TranslationOutput {
+ return rotate(sentence.laClauses.map(translateFullClause)).map((clauses) => {
+ const contexts = clauses.slice(0, clauses.length - 1);
+ const final = clauses[clauses.length - 1];
+ return [
+ ...contexts.map((context) => `given ${context}, `),
+ final,
+ sentence.punctuation,
+ ].join("");
+ });
+}
+/** Translates multiple sentences. */
+function translateSentences(sentences: Array): TranslationOutput {
+ return rotate(sentences.map(translateSentence)).map((sentences) =>
+ sentences.join(" ")
+ );
+}
+/** Full Toki Pona translator. */
+export function translate(src: string): TranslationOutput {
+ return parser(src).flatMap(translateSentences);
+}
diff --git a/src/vocabulary.ts b/src/vocabulary.ts
new file mode 100644
index 0000000..360d39c
--- /dev/null
+++ b/src/vocabulary.ts
@@ -0,0 +1,155 @@
+/** Particles. */
+export const PARTICLES = new Set([
+ "a",
+ "ala",
+ "anu",
+ "e",
+ "en",
+ "la",
+ "li",
+ "nanpa",
+ "o",
+ "pi",
+ "taso",
+]);
+/** Content words. */
+export const CONTENT_WORD = new Set([
+ "akesi",
+ "ala",
+ "alasa",
+ "ale",
+ "ali",
+ "anpa",
+ "ante",
+ "awen",
+ "esun",
+ "ijo",
+ "ike",
+ "ilo",
+ "insa",
+ "jaki",
+ "jan",
+ "jelo",
+ "jo",
+ "kala",
+ "kalama",
+ "kama",
+ "kasi",
+ "ken",
+ "kepeken",
+ "kili",
+ "kiwen",
+ "ko",
+ "kon",
+ "kule",
+ "kulupu",
+ "kute",
+ "lape",
+ "laso",
+ "lawa",
+ "len",
+ "lete",
+ "lili",
+ "linja",
+ "lipu",
+ "loje",
+ "lon",
+ "luka",
+ "lukin",
+ "lupa",
+ "ma",
+ "mama",
+ "mani",
+ "meli",
+ "mi",
+ "mije",
+ "moku",
+ "moli",
+ "monsi",
+ "mu",
+ "mun",
+ "musi",
+ "mute",
+ "nanpa",
+ "nasa",
+ "nasin",
+ "nena",
+ "ni",
+ "nimi",
+ "noka",
+ "olin",
+ "ona",
+ "open",
+ "pakala",
+ "pali",
+ "palisa",
+ "pan",
+ "pana",
+ "pilin",
+ "pimeja",
+ "pini",
+ "pipi",
+ "poka",
+ "poki",
+ "pona",
+ "pu",
+ "sama",
+ "seli",
+ "selo",
+ "seme",
+ "sewi",
+ "sijelo",
+ "sike",
+ "sin",
+ "sina",
+ "sinpin",
+ "sitelen",
+ "sona",
+ "soweli",
+ "suli",
+ "suno",
+ "supa",
+ "suwi",
+ "tan",
+ "taso",
+ "tawa",
+ "telo",
+ "tenpo",
+ "toki",
+ "tomo",
+ "tonsi",
+ "tu",
+ "unpa",
+ "uta",
+ "utala",
+ "walo",
+ "wan",
+ "waso",
+ "wawa",
+ "weka",
+ "wile",
+]);
+/** Special subjects that doesn't use _li_ */
+export const SPECIAL_SUBJECT = new Set(["mi", "sina"]);
+export const NUMBER = new Set(["wan", "tu", "luka", "mute", "ale", "ali"]);
+export const PREVERB = new Set([
+ "alasa",
+ "awen",
+ "kama",
+ "ken",
+ "lukin",
+ "open",
+ "pini",
+ "sona",
+ "wile",
+]);
+/** Prepositions. */
+export const PREPOSITION = new Set([
+ "kepeken",
+ "lon",
+ "sama",
+ "tan",
+ "tawa",
+]);
+/** Full vocabulary. */
+export const VOCABULARY = new Set([...PARTICLES, ...CONTENT_WORD]);
diff --git a/style.css b/style.css
index bea3e1f..ccfc037 100644
--- a/style.css
+++ b/style.css
@@ -1,6 +1,6 @@
body {
margin: 10px;
- font-family: Andika, sans-serif;
+ font-family: sans-serif;
}
a {
color: #0057af;
@@ -36,3 +36,6 @@ a:visited {
color: #ff5e5e;
}
}
+summary {
+ cursor: pointer;
+}