From 9045bd753f1be451da1f266ed60aac2b794b5792 Mon Sep 17 00:00:00 2001 From: Levi Gruspe Date: Tue, 23 May 2023 17:18:01 +0800 Subject: [PATCH] Add tools for computing graph statistics --- colexification_graphs/quantiles.py | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 colexification_graphs/quantiles.py diff --git a/colexification_graphs/quantiles.py b/colexification_graphs/quantiles.py new file mode 100644 index 0000000..6f8c394 --- /dev/null +++ b/colexification_graphs/quantiles.py @@ -0,0 +1,37 @@ +# Copyright 2023 Levi Gruspe +# Licensed under GNU GPLv3 or later +# See https://www.gnu.org/licenses/gpl-3.0.en.html + +# pylint: disable=invalid-name +"""Compute quantile scores of edge weights.""" + +from argparse import ArgumentParser, Namespace +from csv import reader +from pathlib import Path +from statistics import quantiles + + +def parse_args() -> Namespace: + """Parse command-line arguments.""" + parser = ArgumentParser(description=__doc__) + parser.add_argument( + "graph", + type=Path, + help="path to graph .tsv file (edge weights in 5th column)", + ) + return parser.parse_args() + + +def main(args: Namespace) -> None: + """Script entrypoint.""" + with open(args.graph, encoding="utf-8") as file: + weights = [int(row[4]) for row in reader(file, delimiter="\t")] + weights.sort() + print(weights) + + for n in [2, 3, 4, 5, 10, 100, 1000]: + print(n, quantiles(weights, n=n)) + + +if __name__ == "__main__": + main(parse_args())