-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathconstituent.py
91 lines (78 loc) · 2.78 KB
/
constituent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
"""
Calculate all words from the constituent letters.
Used at: http://srmorph.languagebits.com/constituent
"""
# srmorph - Pythonic Experiments in Serbian Morphology
# Copyright (C) 2013 Romeo Mlinar
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
import helpers
from text.makewords import load_dictionaries
def find_words_from_letters(word, lenmin=3, lenmax=False, corpus=False):
"""
Find all words that can be created from letters in 'word'.
"""
words = {}
words['w'] = {}
# If max len is not provided, them max len is
# the length of the word provided.
if lenmax == False:
lenmax = len(word)
words['lenmax_correction_type'] = 'set_as_word_lenth_by_def'
if lenmax > len(word):
lenmax = len(word)
words['lenmax_correction_type'] = 'set_as_word_lenth'
else:
words['lenmax_correction_type'] = 'none'
words['lenmax'] = lenmax
# If not called from class, load the corpus.
if not corpus:
corpus = load_dictionaries(fromclass=False)
# Iterate over words in corpus.
cmain = get_letters(word)
for w in corpus:
wl = len(w)
if (wl >= lenmin) and (wl <= lenmax):
#if True:
if can_be_made(cmain, w):
try:
words['w'][wl].append(w)
except KeyError:
words['w'][wl] = [w, ]
return words
#return helpers.sort_dictionary(words)
def can_be_made(cmain, sub):
"""
Return true is sub word can be made from letters in the main word.
cmain already has calculated letters to save time.
"""
# Get constituent letters of the sub word.
csub = get_letters(sub)
# condition: must have all letters.
if not set(cmain.keys()).issuperset(set(csub.keys())):
return False
else:
for i in csub:
if not (csub[i] <= cmain[i]):
return False
return True
def get_letters(word):
"""
Return a dictionary with counted letters
from the word.
"""
letters = {}
for i in word:
try:
letters[i] = letters[i] + 1
except KeyError:
letters[i] = 1
return letters