-
Notifications
You must be signed in to change notification settings - Fork 0
/
overall_graph.R
64 lines (56 loc) · 3.61 KB
/
overall_graph.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
library(data.table)
#x <- readLines("README.md")
x <- grep("^Tokens|^Sentences|^Words|^UPOS|^XPOS|^Feats|^AllTags|^Lemmas|^UAS|^LAS|^CLAS", x, value=TRUE)
x <- lapply(x, FUN=function(x) data.frame(t(trimws(strsplit(x, "\\|")[[1]]))))
x <- rbindlist(x, fill = TRUE)
colnames(x) <- c("metric", "precision", "recall", "f1", "alignedacc")
x$treebank <- unlist(lapply(c("French Sequioa"),
FUN=function(x) rep(x, 11*2)))
x$model <- unlist(lapply(c("French Sequioa"),
FUN=function(x) c(rep("UDPipe", 11), rep("spaCy", 11))))
x$precision <- as.numeric(as.character(x$precision))
x$recall <- as.numeric(as.character(x$recall))
x$f1 <- as.numeric(as.character(x$f1))
x$alignedacc <- as.numeric(as.character(x$alignedacc))
library(lattice)
x$treebank <- factor(x$treebank)
x <- x[order(x$treebank), ]
x <- subset(x, !(treebank %in% "French Sequioa" & metric == "XPOS"))
#x <- subset(x, treebank != "English")
trellis.par.set(name = "strip.background", value = list(col = "honeydew2"))
xyplot(alignedacc ~ treebank | metric, groups = model,
data = subset(x, metric %in% c("UPOS", "XPOS", "Feats", "Lemmas")),
scales = list(x = list(rot = 45, alternating = FALSE), y = list(relation = "free")),
auto.key = list(space = "right", lines = TRUE), type = "b", pch = 20,
par.settings = simpleTheme(col=c("red", "blue")), layout = c(4, 1),
ylab = "Word-aligned 'gold' accuracy", xlab = "Universal Dependencies Treebank v2.0",
main = "spaCy/UDPipe accuracy comparison\nParts of Speech tagging, Morphological Features & Lemmatisation")
xyplot(alignedacc ~ treebank | metric, groups = model,
data = subset(x, metric %in% c("UAS", "LAS", "CLAS") & treebank != "English"),
scales = list(x = list(rot = 45, alternating = FALSE)),
auto.key = list(space = "right", lines = TRUE), type = "b",
par.settings = simpleTheme(col=c("red", "blue")), layout = c(3, 1),
ylab = "Word-aligned 'gold' accuracy", xlab = "Universal Dependencies Treebank v2.0",
main = "spaCy/UDPipe accuracy comparison\nDependency Parsing")
trellis.par.set(name = "strip.background", value = list(col = "honeydew2"))
xyplot(f1 ~ treebank | metric, groups = model,
data = subset(x, metric %in% c("Tokens", "Words", "Sentences")),
scales = list(x = list(rot = 45, alternating = FALSE), y = list(relation = "free")),
auto.key = list(space = "right", lines = TRUE), type = "b", pch = 20,
par.settings = simpleTheme(col=c("red", "blue")), layout = c(3, 1),
ylab = "F1", xlab = "Universal Dependencies Treebank v2.0",
main = "spaCy/UDPipe F1 comparison\nTokenisation")
xyplot(f1 ~ treebank | metric, groups = model,
data = subset(x, metric %in% c("UPOS", "XPOS", "Feats", "Lemmas")),
scales = list(x = list(rot = 45, alternating = FALSE), y = list(relation = "free")),
auto.key = list(space = "right", lines = TRUE), type = "b", pch = 20,
par.settings = simpleTheme(col=c("red", "blue")), layout = c(4, 1),
ylab = "F1", xlab = "Universal Dependencies Treebank v2.0",
main = "spaCy/UDPipe F1 comparison\nParts of Speech tagging, Morphological Features & Lemmatisation")
xyplot(f1 ~ treebank | metric, groups = model,
data = subset(x, metric %in% c("UAS", "LAS", "CLAS") & treebank != "English"),
scales = list(x = list(rot = 45, alternating = FALSE)),
auto.key = list(space = "right", lines = TRUE), type = "b",
par.settings = simpleTheme(col=c("red", "blue")), layout = c(3, 1),
ylab = "F1", xlab = "Universal Dependencies Treebank v2.0",
main = "spaCy/UDPipe F1 comparison\nDependency Parsing")