Skip to content
This repository has been archived by the owner on Dec 11, 2023. It is now read-only.

Commit

Permalink
Added a new benchmark for iterating columns
Browse files Browse the repository at this point in the history
  • Loading branch information
Francesc Alted committed May 4, 2015
1 parent 73da749 commit 5ebe7c1
Showing 1 changed file with 60 additions and 0 deletions.
60 changes: 60 additions & 0 deletions bench/column-iter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
from __future__ import print_function
import contextlib, time
import bcolz, numpy

@contextlib.contextmanager
def ctime(label=""):
"Counts the time spent in some context"
t = time.time()
yield
print(label, round(time.time() - t, 3), "sec")


N = 1000 * 1000

ct = bcolz.fromiter(((i, i*i, i*i*i) for i in xrange(N)), dtype='i8,i8,i8', count=N)

b = numpy.array(numpy.arange(N) % 2, dtype="bool")
c = bcolz.carray(b)

sorted_index = range(1, N, 2)
with ctime():
r0 = (ct['f0'][sorted_index]).tolist()

with ctime():
r1 = [x.f0 for x in ct.where(b)]
assert r0 == r1

with ctime():
r2 = [x.f0 for x in ct.where(c)]
assert r0 == r2

with ctime():
r3 = [x for x in ct['f0'].where(b)]
assert r0 == r3

with ctime():
r4 = [x for x in ct['f0'].where(c)]
assert r0 == r4

# sum
with ctime("sum list"):
r5 = sum([x for x in ct['f0'].where(c)])

with ctime("sum generator"):
r6 = sum(x for x in ct['f0'].where(c))
assert r5 == r6

with ctime("sum method"):
r7 = bcolz.fromiter((x for x in ct['f0'].where(c)),
dtype=ct['f0'].dtype,
count=c.wheretrue().sum()).sum()
assert r7 == r5

# sum with no NA's
with ctime("sum with no NA (list)"):
r8 = sum([x for x in ct['f0'].where(c) if x == x]) # x==x check to leave out NA values

# sum with no NA's
with ctime("sum with no NA (generator)"):
r9 = sum((x for x in ct['f0'].where(c) if x == x)) # x==x check to leave out NA values

0 comments on commit 5ebe7c1

Please sign in to comment.