Skip to content

Commit

Permalink
Sort offsets in generate_highlighted_text
Browse files Browse the repository at this point in the history
  • Loading branch information
raphael0202 committed Jun 21, 2023
1 parent f34e440 commit eec6ba9
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
4 changes: 3 additions & 1 deletion ingredient_extraction/clean_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,14 +110,16 @@ def annotate(item: dict, existing_annotation: Optional[dict] = None):
console.print(f"Image URL: {meta['url'].replace('.json', '.jpg')}")
identifier = meta["id"]
console.print(f"ID: {identifier}")
offsets = sorted(item["offsets"], key=lambda x: x[0])
console.print(f"offsets: {offsets}")
if existing_annotation is not None:
console.print(
f"Annotation already exists: "
f"action='{existing_annotation['action']}', "
f"updated_offsets={existing_annotation['updated_offsets']}"
)
marked_text = generate_highlighted_text(
item["text"], [list(x) for x in item["offsets"]]
item["text"], [list(x) for x in offsets]
)
marked_text_highlighted = marked_text.replace("<b>", "[red]").replace(
"</b>", "[/red]"
Expand Down
1 change: 1 addition & 0 deletions ingredient_extraction/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ def generate_highlighted_text(
mark_token: str = "b",
html_escape: bool = False,
) -> str:
offsets = sorted(offsets, key=lambda x: x[0])
highlighted_text = []
previous_idx = 0
escape_func = (lambda x: x) if html_escape is False else html.escape
Expand Down

0 comments on commit eec6ba9

Please sign in to comment.