Skip to content

Commit

Permalink
Merge pull request #408 from robertcv/fix/corpus_viewer_mark
Browse files Browse the repository at this point in the history
[FIX] OWCorpusViewer: mark filtered text with Python
  • Loading branch information
ajdapretnar authored Mar 19, 2019
2 parents 9572916 + 0ef4fca commit bf9b035
Show file tree
Hide file tree
Showing 3 changed files with 50 additions and 40 deletions.
51 changes: 28 additions & 23 deletions orangecontrib/text/widgets/owcorpusviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@
from itertools import chain

from AnyQt.QtCore import (
Qt, QUrl, QItemSelection, QItemSelectionModel, QItemSelectionRange,
pyqtSlot as Slot
Qt, QUrl, QItemSelection, QItemSelectionModel, QItemSelectionRange
)

from AnyQt.QtGui import QStandardItemModel, QStandardItem
Expand Down Expand Up @@ -118,7 +117,6 @@ def __init__(self):

# Document contents
self.doc_webview = gui.WebviewWidget(self.splitter, debug=False)
self.doc_webview.loadFinished.connect(self.highlight_docs)

self.mainArea.layout().addWidget(self.splitter)

Expand Down Expand Up @@ -321,14 +319,16 @@ def show_docs(self):
row_ind = index.data(Qt.UserRole).row_index
for ind in self.display_indices:
feature = self.display_features[ind]
mark = ' mark-area' if feature in marked_search_features else ''
value = str(index.data(Qt.UserRole)[feature.name]).replace('\n', '<br/>')
value = str(index.data(Qt.UserRole)[feature.name])
if feature in marked_search_features:
value = self.__mark_text(value)
value = value.replace('\n', '<br/>')
is_image = feature.attributes.get('type', '') == 'image'
if is_image and value != '?':
value = '<img src="{}"></img>'.format(value)
html += '<tr><td class="variables"><strong>{}:</strong></td>' \
'<td class="content{}">{}</td></tr>'.format(
feature.name, mark, value)
'<td class="content">{}</td></tr>'.format(
feature.name, value)

if self.show_tokens:
html += '<tr><td class="variables"><strong>Tokens & Tags:</strong></td>' \
Expand All @@ -339,6 +339,27 @@ def show_docs(self):
base = QUrl.fromLocalFile(__file__)
self.doc_webview.setHtml(HTML.format(html), base)

def __mark_text(self, text):
search_keyword = self.regexp_filter.strip('|')
if not search_keyword:
return text

try:
reg = re.compile(search_keyword, re.IGNORECASE | re.MULTILINE)
except sre_constants.error:
return text

matches = list(reg.finditer(text))
if not matches:
return text

text = list(text)
for m in matches[::-1]:
text[m.start():m.end()] = list('<mark data-markjs="true">{}</mark>'\
.format("".join(text[m.start():m.end()])))

return "".join(text)

def search_features_changed(self):
self.regenerate_docs()
self.refresh_search()
Expand All @@ -359,22 +380,6 @@ def refresh_search(self):
self.update_info()
self.commit()

@Slot()
def highlight_docs(self):
search_keyword = self.regexp_filter.\
strip('|').replace('\\', '\\\\') # escape one \ to two for mark.js

if search_keyword:
# mark is undefined when clearing the view (`setHtml('')`). Maybe
# set and template html with all the scripts, ... but no contents?
self.doc_webview.runJavaScript(
'''
if (typeof mark !== "undefined") {{
mark("{}");
}}
'''.format(search_keyword)
)

def update_info(self):
if self.corpus is not None:
self.n_documents = len(self.corpus)
Expand Down
17 changes: 0 additions & 17 deletions orangecontrib/text/widgets/resources/highlighter.js

This file was deleted.

22 changes: 22 additions & 0 deletions orangecontrib/text/widgets/tests/test_owcorpusviewer.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import unittest
from AnyQt.QtTest import QSignalSpy
from Orange.widgets.tests.base import WidgetTest
from Orange.data import StringVariable

from orangecontrib.text.corpus import Corpus
from orangecontrib.text.widgets.owcorpusviewer import OWCorpusViewer
Expand Down Expand Up @@ -34,6 +35,27 @@ def test_highlighting(self):
html = self.widget.doc_webview.html()
self.assertIn('<mark data-markjs="true">', html)

def test_highlighting_non_latin(self):
documents = [
{
'content': """царстве есть сад с молодильными яблоками"""
}
]
metas = [
(StringVariable('content'), lambda doc: doc.get('content')),
]
dataset_name = 'RussianDocument'
corpus = Corpus.from_documents(documents, dataset_name, metas=metas)

self.send_signal(self.widget.Inputs.corpus, corpus)
self.widget.regexp_filter = "\\bсад\\b"
self.process_events()
self.widget.doc_webview.html()
spy = QSignalSpy(self.widget.doc_webview.loadFinished)
spy.wait()
html = self.widget.doc_webview.html()
self.assertIn('<mark data-markjs="true">', html)


if __name__ == "__main__":
unittest.main()

0 comments on commit bf9b035

Please sign in to comment.