forked from assem-ch/arabicstemmer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMakefile
116 lines (96 loc) · 4 KB
/
Makefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
SNOWBALL="./snowball/"
ARABIC_STEMMER="algorithm/stemmer.sbl"
ARABIC_ROOT_BASED_STEMMER = "algorithm/root_based_stemmer.sbl"
VOCFILE="test_data/voc.txt"
OUTPUTFILE="test_data/output.txt"
GROUPINGFILE="test_data/grouping.txt"
CORPUS_WORDS="golden_corpus/core/words.txt"
CORPUS_STEMS="test_golden_corpus/stems_output.txt"
CORPUS_ROOTS="test_golden_corpus/roots_output.txt"
GOLDEN_CORPUS="./golden_corpus/"
default: build
download: download_snowball download_data download_golden_corpus
download_snowball:
@echo "Wait for download snowball ......"
@curl -LOk https://github.com/snowballstem/snowball/archive/master.zip
@echo "Unziping the snowball file ......"
@unzip master.zip
@echo "Rename snowball-master to snowball"
@mv snowball-master snowball
@echo " Delete master.zip ......"
@rm master.zip
download_data:
@echo "waiting for download test data ..... "
@curl -LOk https://github.com/snowballstem/snowball-data/raw/master/arabic/voc.txt.gz
@echo "Unziping voc.txt.gz"
@gzip -d voc.txt.gz
@echo "Creating test_data folder ....."
@mkdir test_data
@echo "Move voc.txt to test_data ....."
@mv voc.txt test_data
download_golden_corpus:
@echo "waiting for download golden_corpus_arabic ..... "
@curl -LOk https://github.com/LBenzahia/golden-corpus-arabic/archive/master.zip
@echo "Unziping master.zip"
@unzip master.zip
@echo "Rename golden-corpus-arabic folder to golden_corpus ....."
@mv golden-corpus-arabic-master golden_corpus
@echo " Delete master.zip ......"
@rm master.zip
@echo "building golden-corpus-arabic.json ..."
@mkdir test_golden_corpus
@cd $(GOLDEN_CORPUS); make
build:
@echo "Copying the algorithm to snowball..."
@cp $(ARABIC_STEMMER) $(SNOWBALL)"algorithms/arabic.sbl"
@echo "Building light stemmer ..."
@cd $(SNOWBALL); make
build_root_based_stemmer:
@echo "Copying the algorithm of based-root stemmer to snowball..."
@cp $(ARABIC_ROOT_BASED_STEMMER) $(SNOWBALL)"algorithms/arabic.sbl"
@echo "Building based root stemmer ..."
@cd $(SNOWBALL); make
run: build
@echo "Put your words here:"
@cd snowball; ./stemwords -l ar
run_root: build_root_based_stemmer
@echo "Put your words here:"
@cd snowball; ./stemwords -l ar
dist: build
@echo "Compiling the light stemming algorithm to available programming languages"
@cd $(SNOWBALL); make dist
@mkdir -p "dist/python/"; cp $(SNOWBALL)dist/snowballstemmer-*.tar.gz "dist/python/"
@mkdir -p "dist/java/";cp $(SNOWBALL)"dist/libstemmer_java.tgz" "dist/java/"
@mkdir -p "dist/c/";cp $(SNOWBALL)"dist/libstemmer_c.tgz" "dist/c/"
@mkdir -p "dist/jsx/";cp $(SNOWBALL)"dist/jsxstemmer.tgz" "dist/jsx/"
dist_rooter: build_root_based_stemmer
@echo "Compiling the root-based stemming algorithm to available programming languages"
@cd $(SNOWBALL); make dist
@mkdir -p "dist_rooter/python/"; cp $(SNOWBALL)dist/snowballstemmer-*.tar.gz "dist/python/"
@mkdir -p "dist_rooter/java/";cp $(SNOWBALL)"dist/libstemmer_java.tgz" "dist/java/"
@mkdir -p "dist_rooter/c/";cp $(SNOWBALL)"dist/libstemmer_c.tgz" "dist/c/"
@mkdir -p "dist_rooter/jsx/";cp $(SNOWBALL)"dist/jsxstemmer.tgz" "dist/jsx/"
time:
@echo "Stemming sample timing..."
@time $(SNOWBALL)stemwords -l ar -i $(VOCFILE) -o $(OUTPUTFILE)
grouping: time
@echo "Stemming sample grouping effect..."
@python algorithm/test/test_grouping.py $(OUTPUTFILE) $(VOCFILE) $(GROUPINGFILE)
test: time grouping
get_roots: build_root_based_stemmer
@echo "getting roots from words.txt and put it in roots.txt ......"
@time $(SNOWBALL)stemwords -l ar -i $(CORPUS_WORDS) -o $(CORPUS_ROOTS)
get_stems: build
@echo "getting stems from words.txt and put it in stems.txt ......"
@time $(SNOWBALL)stemwords -l ar -i $(CORPUS_WORDS) -o $(CORPUS_STEMS)
get_all : get_stems get_roots
test_arabicstemmer: get_all
@echo "test arabicstemmer using golden_corpus_arabic......"
@python algorithm/test/test_stemmer.py
test_isri:
@echo "Testing ISRI Arabic Stemmer using golden_corpus_arabic ..."
@python algorithm/test/test_isri.py
clean:
@rm -rf golden_corpus
@rm -rf test_golden_corpus
update: clean download_golden_corpus