diff --git a/examples/notebooks/textReuse.ipynb b/examples/notebooks/textReuse.ipynb
index 613351d..f8eb89f 100644
--- a/examples/notebooks/textReuse.ipynb
+++ b/examples/notebooks/textReuse.ipynb
@@ -2048,6 +2048,148 @@
")"
]
},
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Find passages for a cluster by its ID"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
FindTextReusePassages result
\n",
+ "Contains 2 items of 2 total items.
\n",
+ "
\n",
+ "See this result in the Impresso App.\n",
+ "Data preview:
\n",
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " offsetStart | \n",
+ " offsetEnd | \n",
+ " content | \n",
+ " title | \n",
+ " pageNumbers | \n",
+ " collections | \n",
+ " connectedClusters | \n",
+ " isFront | \n",
+ " size | \n",
+ " date | \n",
+ " pageRegions | \n",
+ " article.id | \n",
+ " textReuseCluster.id | \n",
+ " textReuseCluster.clusterSize | \n",
+ " textReuseCluster.timeDifferenceDay | \n",
+ " textReuseCluster.lexicalOverlap | \n",
+ " newspaper.id | \n",
+ " issue.id | \n",
+ "
\n",
+ " \n",
+ " id | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " c137438978332-LLE-1891-07-21-a-i0023@3354:5682 | \n",
+ " 3354 | \n",
+ " 5682 | \n",
+ " Un rapport géné-\\nral au roi Léopold sur la si... | \n",
+ " BULLETIN POLITIQUE | \n",
+ " [1] | \n",
+ " [local-duma-x9GD_Bj6, local-eb-ikYoMqvi] | \n",
+ " [{'id': 'tr-nobp-all-v01-c137438978332'}, {'id... | \n",
+ " True | \n",
+ " 2328 | \n",
+ " 1891-07-21T00:00:00+00:00 | \n",
+ " [657,609,1237,3220] | \n",
+ " LLE-1891-07-21-a-i0023 | \n",
+ " tr-nobp-all-v01-c137438978332 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 59.183673 | \n",
+ " LLE | \n",
+ " LLE-1891-07-21-a | \n",
+ "
\n",
+ " \n",
+ " c137438978332-indeplux-1891-07-20-a-i0012@2514:5104 | \n",
+ " 2514 | \n",
+ " 5104 | \n",
+ " Un rapport général au roi Léopold\\nsur la situ... | \n",
+ " Le Congo. | \n",
+ " [1] | \n",
+ " [] | \n",
+ " [{'id': 'tr-nobp-all-v01-c137438978332'}, {'id... | \n",
+ " True | \n",
+ " 2590 | \n",
+ " 1891-07-20T00:00:00+00:00 | \n",
+ " [1822,976,1565,3353] | \n",
+ " indeplux-1891-07-20-a-i0012 | \n",
+ " tr-nobp-all-v01-c137438978332 | \n",
+ " 2 | \n",
+ " 1 | \n",
+ " 59.183673 | \n",
+ " indeplux | \n",
+ " indeplux-1891-07-20-a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "impresso.text_reuse.passages.find(\n",
+ " cluster_id=\"tr-nobp-all-v01-c137438978332\",\n",
+ " order_by=\"clusterSize\",\n",
+ ")"
+ ]
+ },
{
"cell_type": "markdown",
"metadata": {},
diff --git a/impresso/resources/text_reuse/clusters.py b/impresso/resources/text_reuse/clusters.py
index 888ed9d..0264150 100644
--- a/impresso/resources/text_reuse/clusters.py
+++ b/impresso/resources/text_reuse/clusters.py
@@ -254,6 +254,7 @@ def _build_cluster_facet_filters(
def _build_filters(
text: str | None = None,
+ cluster_id: str | AND[str] | OR[str] | None = None,
cluster_size: Range | AND[Range] | OR[Range] | None = None,
title: str | AND[str] | OR[str] | None = None,
lexical_overlap: Range | AND[Range] | OR[Range] | None = None,
@@ -273,6 +274,8 @@ def _build_filters(
filters: list[Filter] = []
if text is not None:
filters.extend(and_or_filter(text, "string"))
+ if cluster_id is not None:
+ filters.extend(and_or_filter(cluster_id, "text_reuse_cluster"))
if cluster_size is not None:
filters.extend(
and_or_filter(
diff --git a/impresso/resources/text_reuse/passages.py b/impresso/resources/text_reuse/passages.py
index 9ee0804..5176552 100644
--- a/impresso/resources/text_reuse/passages.py
+++ b/impresso/resources/text_reuse/passages.py
@@ -54,6 +54,7 @@ def find(
limit: int | None = None,
offset: int | None = None,
order_by: FindTextReusePassagesOrderByLiteral | None = None,
+ cluster_id: str | AND[str] | OR[str] | None = None,
cluster_size: Range | AND[Range] | OR[Range] | None = None,
title: str | AND[str] | OR[str] | None = None,
lexical_overlap: Range | AND[Range] | OR[Range] | None = None,
@@ -70,6 +71,7 @@ def find(
) -> FindTextReusePassagesContainer:
# reusing build filters from clusters - they are the same
filters = _build_filters(
+ cluster_id=cluster_id,
cluster_size=cluster_size,
title=title,
lexical_overlap=lexical_overlap,
@@ -119,6 +121,7 @@ def facet(
limit: int | None = None,
offset: int | None = None,
order_by: FindTextReusePassagesOrderByLiteral | None = None,
+ cluster_id: str | AND[str] | OR[str] | None = None,
cluster_size: Range | AND[Range] | OR[Range] | None = None,
title: str | AND[str] | OR[str] | None = None,
lexical_overlap: Range | AND[Range] | OR[Range] | None = None,
@@ -138,6 +141,7 @@ def facet(
raise ValueError(f"{facet} is not a valid value")
filters = _build_filters(
+ cluster_id=cluster_id,
cluster_size=cluster_size,
title=title,
lexical_overlap=lexical_overlap,