Skip to content

Commit

Permalink
fix: okascore C api usage (#19)
Browse files Browse the repository at this point in the history
* tests: demonstrate #18

Fix 'setup.py' to prevent building 'okascore' module if 'PURE_PYTHON'
is defined.

Add 'py310-pure' environment to build / run tests with that env var set.

Note that the new test fails under 'py310', but passes under 'py310-pure'

* fix: use 'PyFloat_AsDouble' for upcasts

- We know that the second element of each 'd2fitems' *should* be a float
  already (we store it, after all, in an IF BTree).

- The upcast in the second change is safe, and more regular.

* ci: bump checkout, setup-python action versions

- Use released Python 3.12

* ci: work around GHA macos-latest messes:

- actions/setup-python#850
- actions/setup-python#860

---------

Co-authored-by: Peter Wilkinson <[email protected]>
  • Loading branch information
tseaver and pfw authored May 16, 2024
1 parent d195029 commit c14e71d
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 23 deletions.
25 changes: 16 additions & 9 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
- "3.12-dev"
- "3.12"
- "pypy-3.9"
os:
- "ubuntu-20.04"
Expand All @@ -26,9 +26,16 @@ jobs:
- x64

exclude:
# persistent fails to build on 3.12-dev on MacOS in GitHub
# See: https://github.com/actions/setup-python/issues/850
- os: macos-latest
py: "3.12-dev"
py: "3.7"
- os: macos-latest
py: "3.8"
- os: macos-latest
py: "3.9"
# See: https://github.com/actions/setup-python/issues/860
- os: macos-latest
py: "3.10"

include:
# Only run coverage on ubuntu-20.04, except on pypy3
Expand All @@ -42,9 +49,9 @@ jobs:
name: "Python: ${{ matrix.py }}-${{ matrix.architecture }} on ${{ matrix.os }}"
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.py }}
architecture: ${{ matrix.architecture }}
Expand All @@ -55,9 +62,9 @@ jobs:
runs-on: ubuntu-20.04
name: Validate coverage
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: 3.7
architecture: x64
Expand All @@ -67,9 +74,9 @@ jobs:
runs-on: ubuntu-20.04
name: Build the documentation
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
- name: Setup python
uses: actions/setup-python@v2
uses: actions/setup-python@v5
with:
python-version: 3.7
architecture: x64
Expand Down
24 changes: 14 additions & 10 deletions hypatia/text/okascore.c
Original file line number Diff line number Diff line change
Expand Up @@ -89,22 +89,26 @@ score(PyObject *self, PyObject *args)
return NULL;
}
d = PyTuple_GET_ITEM(d_and_f, 0);
#ifdef PY3K
f = (double)PyLong_AsLong(PyTuple_GET_ITEM(d_and_f, 1));
#else
f = (double)PyInt_AsLong(PyTuple_GET_ITEM(d_and_f, 1));
#endif
f = PyFloat_AsDouble(PyTuple_GET_ITEM(d_and_f, 1));

if (PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"d2fitem[1] should be a a float");
return NULL;
}

doclen = PyObject_GetItem(d2len, d);
if (doclen == NULL) {
Py_DECREF(d_and_f);
return NULL;
}
#ifdef PY3K

lenweight = B_FROM1 + B * PyFloat_AsDouble(doclen) / meandoclen;
#else
lenweight = B_FROM1 + B * PyInt_AsLong(doclen) / meandoclen;
#endif
if (PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"doclen be a a float");
return NULL;
}

tf = f * K1_PLUS1 / (f + K1 * lenweight);
doc_score = PyFloat_FromDouble(tf * idf);
Expand Down Expand Up @@ -160,4 +164,4 @@ initokascore(void)
Py_InitModule3("okascore", okascore_functions,
"inner scoring loop for Okapi rank");
}
#endif
#endif
29 changes: 28 additions & 1 deletion hypatia/text/tests/test_okapiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
"""Okapi text index tests
"""
import unittest
from unittest import mock


class OkapiIndexTestBase:
# Subclasses must define '_getBTreesFamily'
Expand Down Expand Up @@ -106,6 +108,31 @@ def test__search_wids_non_empty_wids(self):
self.assertTrue(isinstance(relevance[0][1], float))
self.assertTrue(isinstance(relevance[1], int))

def test__search_wids_non_empty_wids_multiple_docs(self):

NUM_DOCS = 23 # get past the DICT_CUTOFF size
TEXT = 'one two three'
family = self._getBTreesFamily()
index = self._makeOne()

for i in range(NUM_DOCS):
texts = [TEXT] * ((i % 5) + 1)
index.index_doc(i, " ".join(texts) )

d2f = index._wordinfo[1]
assert isinstance(d2f, family.IF.BTree)

wids = [index._lexicon._wids[x] for x in TEXT.split()]

relevances = index._search_wids(wids)

self.assertEqual(len(relevances), len(wids))
for relevance in relevances:
self.assertTrue(isinstance(relevance[0], index.family.IF.Bucket))
self.assertEqual(len(relevance[0]), NUM_DOCS)
self.assertTrue(isinstance(relevance[0][1], float))
self.assertTrue(isinstance(relevance[1], int))

def test__search_wids_old_totaldoclen_no_write_on_read(self):
index = self._makeOne()
index.index_doc(1, 'one two three')
Expand Down Expand Up @@ -144,9 +171,9 @@ def _getBTreesFamily(self):
import BTrees
return BTrees.family32


class OkapiIndexTest64(OkapiIndexTestBase, unittest.TestCase):

def _getBTreesFamily(self):
import BTrees
return BTrees.family64

10 changes: 8 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,17 +26,23 @@ class optional_build_ext(build_ext):
the building of C extensions to fail.
"""
def run(self):
if os.environ.get("PURE_PYTHON"):
self._unavailable("Environment defines 'PURE_PYTHON'")
return
try:
build_ext.run(self)

except DistutilsPlatformError as e:
self._unavailable(e)

def build_extension(self, ext):
try:
if os.environ.get("PURE_PYTHON"):
self._unavailable("Environment defines 'PURE_PYTHON'")
return
try:
build_ext.build_extension(self, ext)

except (CCompilerError, DistutilsExecError) as e:
except (CCompilerError, DistutilsExecError) as e:
self._unavailable(e)

def _unavailable(self, e):
Expand Down
8 changes: 7 additions & 1 deletion tox.ini
Original file line number Diff line number Diff line change
@@ -1,14 +1,20 @@
[tox]
envlist =
py37,py38,py39,py310,py311,py312,pypy38,pypy39,coverage,docs
py37,py38,py39,py310,py310-pure,py311,py312,pypy38,pypy39,coverage,docs

[testenv]
use_develop = False
commands =
pip install -e .[testing]
pytest
extras =
testing

[testenv:py310-pure]
use_develop = False
setenv =
PURE_PYTHON=1

[testenv:coverage]
basepython = python3.7
commands =
Expand Down

0 comments on commit c14e71d

Please sign in to comment.