Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Collect dataset of expressions and classify by generalized AST #974

Closed
Tracked by #967
LeaVerou opened this issue Oct 2, 2023 · 2 comments
Closed
Tracked by #967

Collect dataset of expressions and classify by generalized AST #974

LeaVerou opened this issue Oct 2, 2023 · 2 comments
Assignees

Comments

@LeaVerou
Copy link
Member

LeaVerou commented Oct 2, 2023

This is part of #967 but could be more broadly useful for when we make changes to expressions.

Here’s some code to collect a dataset of expressions used in Mavo apps on a page:

let allExpressions = new Set();
for (let appId in Mavo.all)
Mavo.all[appId].root.walk(node => {
	if (node.expressions?.length || node.expressions?.size) { // Apparently this is a Set in some cases?!
		let expressions = [...node.expressions].flatMap(e => e.parsed.filter(a => !!a.expression).map(e => e.expression));
		expressions = expressions.filter(e => !/^[a-z$_]\w*$/.test(e)); // drop trivial expressions (just an id)
		expressions.forEach(e => allExpressions.add(e));
	}
});
allExpressions = [...allExpressions];
copy(allExpressions);
console.log(allExpressions);

Note that this will not take actions expressions into account, but these don't tend to be very syntactically complex anyway.

Example outputs from various pages:

https://mavo.io/demos

[
	"url('tag')",
	"!tagFilter or count(tag = tagFilter) > 0",
	"if(featured, 'featured')",
	"if(video, 'has-video')",
	"if(starts(url, 'http'), 'external')"
]

https://mavo.io/demos/todo

[
	"count(done)",
	"count(task)"
]

https://mavo.io/demos/svgpath/

[
	"replace(join(pathsummary, ' '), '  ', ' ', 10)",
	"if(absolute, 'absolute')",
	"type != v and type != z",
	"type != h and type != z",
	"type = a",
	"largeArc + 0",
	"sweep + 0",
	"type = c or type = s or type = q",
	"type = c or type = q",
	"type = c",
	"if(absolute, uppercase(type), type)"
]

https://mavo.io/demos/mortgage/

[
	"payment * years * 12",
	"interest / 1200",
	"amount * interest1200 * (1 + 1 / (pow(1 + interest1200 , years * 12) - 1 ))",
	"iff(payment < 1500, block, none)"
]

https://mavo.io/demos/logo/

[
	"height/2",
	"2 * height * tan(angle * PI/180) + thickness",
	"thickness * cos(angle * PI/180)",
	"height * tan(angle * PI/180)",
	"charwidth - thickness",
	"thickness / 2.5",
	"1.5 * charwidth - thickness",
	"2*charwidth - 2*thickness",
	"-height",
	"height * .52 - thickness/2",
	"3*charwidth - 2*thickness + height/2"
]

https://mavo.io/demos/eshop/

[
	"count(product) = 0",
	"sum(subtotal)",
	"$index + 1",
	"amount * quantity"
]

https://mavo.io/demos/foodie/

[
	"count(rating > 3)",
	"count(restaurant)",
	"average(visitRating)",
	"count(visit)",
	"average(dishRating)",
	"count(dish)",
	"day(date)",
	"month(date, 'shortname')",
	"year(date)"
]

https://dmitrysharabin.github.io/mavo-memory-game/

[
	"lowercase(get(game.gameTheme, 'name'))",
	"game.rating",
	"[pluralize(game.move, 'move', 'moves')]",
	"if(game.gameStarted = true, game.timer, '00:00')",
	"random(0, count(themes.theme) - 1)",
	"get(themes.theme, themeNumber)",
	"get(gameTheme, 'symbols')",
	"get(gameTheme, 'color')",
	"if(move < 19, 3, if(move < 26, 2, if(move < 32, 1, 0)))",
	"count(card where starts(state, 'matched-')) = count(card)",
	"minutes($now - startTime) mod 60",
	"seconds($now - startTime) mod 60",
	"if(minutes < 10, 0 & minutes, minutes)",
	"if(seconds < 10, 0 & seconds, seconds)",
	"shuffle(list(symbols, symbols))",
	"if(flipped, 'flipped')",
	"count(attempts) = 0",
	"if(game.gameOver, 'game-over')",
	"game.color",
	"last(stats.attempts)",
	"get(attempt, 'moves')",
	"get(attempt, 'stars')",
	"get(attempt, 'time')",
	"if(game.gameOver, boom)",
	"pluralize(moves, \"move\", \"moves\")",
	"if(stars = 0, \"no stars\", pluralize(stars, \"star\", \"stars\"))"
]

https://dmitrysharabin.github.io/mavo-wordle/

[
	"if(popup.isShowing or statistics.isShowing, backdrop)",
	"if(!mode.practiceMode, result)",
	"if(isOver, statistics.hiddenWord, commonWords.todaysWord)",
	"split(hiddenWord, '')",
	"$today = statistics.date and !mode.practiceMode",
	"count(guesses) + 1",
	"join(guessLetters)",
	"len(guess)",
	"guess in possibleWords.words or guess in commonWords.words",
	"unique(condense(usedLetters where (state = correct and !stateHidden)).letter)",
	"if(seconds($now) - errorTime < 2, visible)",
	"if(isOver, statistics.usedLetters, list())",
	"keyboardLayout.keys",
	"if(attempt > 1, if(contains(join(guesses), key), if(key in correctLetters, correct, if(contains(hiddenWord, key), elsewhere, absent))))",
	"if(isOver, statistics.guesses, list())",
	"if(isShowing, visible)",
	"game.result",
	"$today = date and !mode.practiceMode and game.result != lost",
	"mode.practiceMode",
	"!mode.practiceMode",
	"count(games where guess > 0)",
	"count(games)",
	"round(wins / played * 100) or 0",
	"played = 0",
	"$today = date and !mode.practiceMode",
	"$today + 1 * day()",
	"tomorrow - $now",
	"digits(2, hours(difference))",
	"digits(2, minutes(difference) mod 60)",
	"digits(2, seconds(difference) mod 60)",
	"if(result = won, count(guesses), X)",
	"commonWords.index + 1",
	"played > 0",
	"1 .. 6",
	"if(wins > 0, wins, 'initial')",
	"if(value = 0, '--color: initial; --inset: initial;')",
	"count(filter(games, guess = guessCount)) or 0",
	"if($index mod 5 = 0, \"\\n\") & if(state = correct, \"🟩\", if(state = elsewhere, \"🟨\", \"⬜️\"))",
	"split(join(guesses), '')",
	"if(mode.practiceMode, random(0, count(words) - 1), days($today - startDate) mod count(words))",
	"get(words, index)"
]

Step 1: Expand dataset

Just like I did above, collect even more examples, and create a combined dataset
It may be useful to store URL with each expression as well.

Step 2: Classify by AST structure

Many of these expressions don't each us anything new.

For example, the following expressions all have the same general AST structure:

[
	"count(done)",
	"count(task)",
	"sum(subtotal)",
	"count(restaurant)",
	"average(visitRating)",
	"count(visit)",
	"average(dishRating)",
	"count(dish)",
	"day(date)",
	"year(date)"
	"join(guessLetters)",
	"len(guess)"
]

…which is:

{
	"type": "CallExpression",
	"arguments": [
		{
			"type": "Identifier"
		}
	],
	"callee": {
		"type": "Identifier"
	}
}

We should extract the actual ASTs from these expressions (using Mavo.Script.parse(expression)), classify them by general AST structure (i.e. ignoring the specific names, operators, etc).

Here is some code to map an AST to a generalized AST (an AAST? 😅):

function generalizedAST(ast) {
	ast = structuredClone(ast); 
	Mavo.Script.walk(ast, node => {
		if (node?.type) {
			for (let property in node) {
				if (property !== "type" && !Mavo.Script.childProperties.includes(property)) {
					delete node[property];
				}
			}
		}
	});
	return ast;
}

This can be then serialized to JSON and used as a string key.

@LeaVerou LeaVerou changed the title Collect dataset of expressions Collect dataset of expressions and classify by generalized AST Oct 2, 2023
@LeaVerou
Copy link
Member Author

LeaVerou commented Oct 3, 2023

Assigned @adamjanicki2 for the actual work, and @DmitrySharabin to provide a list of URLs to crawl.

@LeaVerou
Copy link
Member Author

LeaVerou commented Oct 5, 2023

Actually, rather than @DmitrySharabin and @karger providing app URLs on an ad hoc basis, let's all collaborate to build a dataset of Mavo app URLs here: https://coda.io/d/Mavo-research_d8ZCqXVfZJ4/Mavo-App-URLs_subNU#URLs_tuMhW/r2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

3 participants