Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

generator changes + stopiter fix #319

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/05-vector/07-slp.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,13 @@ def corpus(path, encoding="utf-8"):
which is a .txt file with a sentence on each line,
with slash-encoded tokens (e.g., the/DT cat/NN).
"""
result = []
for s in open(path, encoding=encoding):
s = list(map(lambda w: w.split("/"), s.strip().split(" ")))
s = list(map(lambda w: (w[0].replace("&slash;", "/"), w[1]), s))
yield s
# yield s
result.append(s)
return result

# The corpus is included in the Pattern download zip, in pattern/test/corpora:
path = os.path.join(os.path.dirname(__file__), "..", "..", "test", "corpora", "tagged-en-oanc.txt")
Expand Down
20 changes: 16 additions & 4 deletions pattern/db/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1650,8 +1650,11 @@ def __repr__(self):
def associative(query):
""" Yields query rows as dictionaries of (field, value)-items.
"""
result = []
for row in query:
yield query.record(row)
# yield query.record(row)
result.append(query.record(row))
return result

assoc = associative

Expand Down Expand Up @@ -2351,8 +2354,11 @@ def __len__(self):
return len(self._datasheet)

def __iter__(self):
result = []
for i in range(len(self)):
yield list.__getitem__(self._datasheet, i)
# yield list.__getitem__(self._datasheet, i)
result.append(list.__getitem__(self._datasheet, i))
return result

def __repr__(self):
return repr(self._datasheet)
Expand Down Expand Up @@ -2436,8 +2442,11 @@ def __len__(self):
return len(self._datasheet) > 0 and len(self._datasheet[0]) or 0

def __iter__(self):
result = []
for i in range(len(self)):
yield self.__getitem__(i)
# yield self.__getitem__(i)
result.append(self.__getitem__(i))
return result

def __repr__(self):
return repr(list(iter(self)))
Expand Down Expand Up @@ -2566,8 +2575,11 @@ def __len__(self):
return len(self._datasheet)

def __iter__(self): # Can be put more simply but optimized for performance:
result = []
for i in range(len(self)):
yield list.__getitem__(self._datasheet, i)[self._j]
# yield list.__getitem__(self._datasheet, i)[self._j]
result.append(list.__getitem__(self._datasheet, i)[self._j])
return result

def __reversed__(self):
return reversed(list(iter(self)))
Expand Down
10 changes: 6 additions & 4 deletions pattern/graph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,11 +898,13 @@ def dijkstra_shortest_path(graph, id1, id2, heuristic=None, directed=False):
Raises an IndexError between nodes on unconnected graphs.
"""
# Based on: Connelly Barnes, http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/119466
def flatten(list):
def flatten(linked_list):
# Flattens a linked list of the form [0,[1,[2,[]]]]
while len(list) > 0:
yield list[0]
list = list[1]
result = []
while len(linked_list) > 0:
# yield list[0]
result.append(linked_list[0])
linked_list = linked_list[1]
G = adjacency(graph, directed=directed, heuristic=heuristic)
q = [(0, id1, ())] # Heap of (cost, path_head, path_rest).
visited = set() # Visited nodes.
Expand Down
33 changes: 25 additions & 8 deletions pattern/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,12 @@ def cumsum(iterable):
""" Returns an iterator over the cumulative sum of values in the given list.
"""
n = 0
result = []
for x in iterable:
n += x
yield n
# yield n
result.append(n)
return result

#### PROFILER ######################################################################################

Expand Down Expand Up @@ -465,9 +468,12 @@ def type_token_ratio(string, n=100, punctuation=PUNCTUATION):
as opposed to the total number of words (= lexical diversity, vocabulary richness).
"""
def window(a, n=100):
result = []
if n > 0:
for i in range(max(len(a) - n + 1, 1)):
yield a[i:i + n]
# yield a[i:i + n]
result.append(a[i:i + n])
return result
s = string.lower().split()
s = [w.strip(punctuation) for w in s]
# Covington & McFall moving average TTR algorithm.
Expand Down Expand Up @@ -519,16 +525,20 @@ def isplit(string, sep="\t\n\x0b\x0c\r "):
This is efficient in combination with cooccurrence(),
since the string may be very long (e.g., Brown corpus).
"""
result = []
a = []
for ch in string:
if ch not in sep:
a.append(ch)
continue
if a:
yield "".join(a)
# yield "".join(a)
result.append("".join(a))
a = []
if a:
yield "".join(a)
# yield "".join(a)
result.append("".join(a))
return result


def cooccurrence(iterable, window=(-1, -1), term1=lambda x: True, term2=lambda x: True, normalize=lambda x: x, matrix=None, update=None):
Expand Down Expand Up @@ -665,14 +675,18 @@ def _multiple(v, round=False):
if b is None:
a, b = 0, a
if a == b:
yield float(a)
raise StopIteration
# yield float(a)
return float(a)
# raise StopIteration
r = _multiple(b - a)
t = _multiple(r / (n - 1), round=True)
a = floor(a / t) * t
b = ceil(b / t) * t
result = []
for i in range(int((b - a) / t) + 1):
yield a + i * t
# yield a + i * t
result.append(a + i * t)
return result

#### STATISTICS ####################################################################################

Expand Down Expand Up @@ -733,11 +747,14 @@ def simple_moving_average(iterable, k=10):
""" Returns an iterator over the simple moving average of the given list of values.
"""
a = iterable if isinstance(iterable, list) else list(iterable)
result = []
for m in range(len(a)):
i = m - k
j = m + k + 1
w = a[max(0, i):j]
yield float(sum(w)) / (len(w) or 1)
# yield float(sum(w)) / (len(w) or 1)
result.append(float(sum(w)) / (len(w) or 1))
return result

sma = simple_moving_average

Expand Down
20 changes: 14 additions & 6 deletions pattern/server/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1606,28 +1606,36 @@ def _render(self, compiled, *args, **kwargs):
k.update(kwargs)
k["template"] = template
indent = kwargs.pop("indent", False)
result = []
for cmd, v, w in compiled:
if indent is False:
w = ""
if cmd is None:
continue
elif cmd == "<str>":
yield self._encode(v, w)
# yield self._encode(v, w)
result.append(self._encode(v, w))
elif cmd == "<arg>":
yield self._encode(k.get(v, "$" + v), w)
# yield self._encode(k.get(v, "$" + v), w)
result.append(self._encode(k.get(v, "$" + v), w))
elif cmd == "<if>":
yield "".join(self._render(v[1], k)) if eval(v[0]) else ""
# yield "".join(self._render(v[1], k)) if eval(v[0]) else ""
result.append("".join(self._render(v[1], k)) if eval(v[0]) else "")
elif cmd == "<for>":
yield "".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)])
# yield "".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)])
result.append("".join(["".join(self._render(v[2], k, self._dict(v[0], i))) for i in eval(v[1], k)]))
elif cmd == "<eval>":
yield self._encode(eval(v, k), w)
# yield self._encode(eval(v, k), w)
result.append(self._encode(eval(v, k), w))
elif cmd == "<exec>":
o = StringIO()
k["write"] = o.write # Code blocks use write() for output.
exec(v, k)
yield self._encode(o.getvalue(), w)
# yield self._encode(o.getvalue(), w)
result.append(self._encode(o.getvalue(), w))
del k["write"]
o.close()
return result

def render(self, *args, **kwargs):
""" Returns the rendered template as a string.
Expand Down
7 changes: 5 additions & 2 deletions pattern/text/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -589,6 +589,7 @@ def _read(path, encoding="utf-8", comment=";;;"):
""" Returns an iterator over the lines in the file at the given path,
strippping comments and decoding each line to Unicode.
"""
result = []
if path:
if isinstance(path, str) and os.path.exists(path):
# From file path.
Expand All @@ -605,8 +606,10 @@ def _read(path, encoding="utf-8", comment=";;;"):
line = decode_utf8(line, encoding)
if not line or (comment and line.startswith(comment)):
continue
yield line
raise StopIteration
# yield line
result.append(line)
# raise StopIteration
return result


class Lexicon(lazydict):
Expand Down
1 change: 1 addition & 0 deletions pattern/text/en/wordnet/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def __init__(self, synset):
def __iter__(self):
for s in self.synonyms:
yield s
# return (s for s in self.synonyms)

def __len__(self):
return len(self.synonyms)
Expand Down
13 changes: 10 additions & 3 deletions pattern/text/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,11 +168,14 @@ def product(*args, **kwargs):
("t", "a"),
("t", "t")]
"""
result = []
p = [[]]
for iterable in map(tuple, args) * kwargs.get("repeat", 1):
p = [x + [y] for x in p for y in iterable]
for p in p:
yield tuple(p)
# yield tuple(p)
result.append(tuple(p))
return result

try:
from itertools import product
Expand Down Expand Up @@ -722,14 +725,18 @@ def __init__(self, sequence=[], *args, **kwargs):
# Parse nested lists and tuples from the sequence into groups.
# [DT [JJ NN]] => Match.group(1) will yield the JJ NN sequences.
def _ungroup(sequence, groups=None):
result = []
for v in sequence:
if isinstance(v, (list, tuple)):
if groups is not None:
groups.append(list(_ungroup(v, groups=None)))
for v in _ungroup(v, groups):
yield v
# yield v
result.append(v)
else:
yield v
# yield v
result.append(v)
return result
self.groups = []
self.sequence = list(_ungroup(sequence, groups=self.groups))
# Assign Constraint.index:
Expand Down
10 changes: 8 additions & 2 deletions pattern/text/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,12 @@ def __len__(self):

def __iter__(self):
i = 0
result = []
while i < len(self._a):
yield self._f(self._a[i])
# yield self._f(self._a[i])
result.append(self._f(self._a[i]))
i += 1
return result

### SENTENCE #######################################################################################

Expand Down Expand Up @@ -1037,8 +1040,11 @@ def loop(self, *tags):
Possible tags: WORD, LEMMA, POS, CHUNK, PNP, RELATION, ROLE, ANCHOR or a custom word tag.
Any order or combination of tags can be supplied.
"""
result = []
for i in range(len(self.words)):
yield tuple([self.get(i, tag=tag) for tag in tags])
# yield tuple([self.get(i, tag=tag) for tag in tags])
result.append(tuple([self.get(i, tag=tag) for tag in tags]))
return result

def indexof(self, value, tag=WORD):
""" Returns the indices of tokens in the sentence where the given token tag equals the string.
Expand Down
30 changes: 24 additions & 6 deletions pattern/vector/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,21 +118,27 @@ def chunk(iterable, n):
n = int(n)
i = 0
j = 0
result = []
for m in range(n):
j = i + len(a[m::n])
yield a[i:j]
# yield a[i:j]
result.append(a[i:j])
i = j
return result


def mix(iterables=[], n=10):
""" Returns an iterator that alternates the given lists, in n chunks.
"""
# list(mix([[1, 2, 3, 4], ["a", "b"]], n=2)) => [1, 2, "a", 3, 4, "b"]
a = [list(chunk(x, n)) for x in iterables]
result = []
for i in range(int(n)):
for x in a:
for item in x[i]:
yield item
# yield item
result.append(item)
return result


def bin(iterable, key=lambda x: x, value=lambda x: x):
Expand Down Expand Up @@ -2136,9 +2142,12 @@ def sequence(i=0, f=lambda i: i + 1):
# Used to generate unique vector id's in hierarchical().
# We cannot use Vector.id, since the given vectors might be plain dicts.
# We cannot use id(vector), since id() is only unique for the lifespan of the object.
result = []
while True:
yield i
# yield i
result.append(i)
i = f(i)
return result


def hierarchical(vectors, k=1, iterations=1000, distance=COSINE, **kwargs):
Expand Down Expand Up @@ -2569,14 +2578,20 @@ def chunks(iterable, n=10):
a = list(iterable)
i = 0
j = 0
result = []
for m in range(n):
j = i + len(a[m::n])
yield a[i:j]
# yield a[i:j]
result.append(a[i:j])
i = j
return result
k = kwargs.get("k", K)
d = list(chunks(documents, max(k, 2)))
res = []
for holdout in range(k):
yield list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout]
# yield list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout]
res.append(list(chain(*(d[:holdout] + d[holdout + 1:]))), d[holdout])
return res

_folds = folds

Expand All @@ -2595,10 +2610,13 @@ def product(*args):
# Yields the cartesian product of given iterables:
# list(product([1, 2], [3, 4])) => [(1, 3), (1, 4), (2, 3), (2, 4)]
p = [[]]
result = []
for iterable in args:
p = [x + [y] for x in p for y in iterable]
for p in p:
yield tuple(p)
# yield tuple(p)
result.append(tuple(p))
return result
s = [] # [((A, P, R, F, o), parameters), ...]
p = [] # [[("c", 0.1), ("c", 10), ...],
# [("gamma", 0.1), ("gamma", 0.2), ...], ...]
Expand Down
Loading