Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: fix datetime Spanish #99

Draft
wants to merge 7 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 24 additions & 15 deletions kindle2notion/exporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,11 @@


def export_to_notion(
all_books: Dict,
enable_highlight_date: bool,
enable_book_cover: bool,
notion_api_auth_token: str,
notion_database_id: str,
all_books: Dict,
enable_highlight_date: bool,
enable_book_cover: bool,
notion_api_auth_token: str,
notion_database_id: str,
) -> None:
print("Initiating transfer...\n")

Expand Down Expand Up @@ -48,7 +48,7 @@ def export_to_notion(


def _prepare_aggregated_text_for_one_book(
clippings: List, enable_highlight_date: bool
clippings: List, enable_highlight_date: bool
) -> Tuple[str, str]:
# TODO: Special case for books with len(clippings) >= 100 characters. Character limit in a Paragraph block in Notion is 100
formatted_clippings = []
Expand Down Expand Up @@ -77,17 +77,17 @@ def _prepare_aggregated_text_for_one_book(


def _add_book_to_notion(
title: str,
author: str,
clippings_count: int,
formatted_clippings: list,
last_date: str,
notion_api_auth_token: str,
notion_database_id: str,
enable_book_cover: bool,
title: str,
author: str,
clippings_count: int,
formatted_clippings: list,
last_date_string: str,
notion_api_auth_token: str,
notion_database_id: str,
enable_book_cover: bool,
):
notion = notional.connect(auth=notion_api_auth_token)
last_date = datetime.strptime(last_date, "%A, %d %B %Y %I:%M:%S %p")
last_date = __get_last_date_from_string(last_date_string)

# Condition variables
title_exists = False
Expand Down Expand Up @@ -174,6 +174,15 @@ def _add_book_to_notion(
return message


def __get_last_date_from_string(last_date_string: str) -> datetime:
if not last_date_string:
return datetime.now()
try:
return datetime.strptime(last_date_string, "%A, %d %B %Y %I:%M:%S %p")
except ValueError:
# Datetime format is not English, retrying with non AM-PM format
return datetime.strptime(last_date_string, "%A, %d %B %Y %H:%M:%S")

# def _create_rich_text_object(text):
# if "Note: " in text:
# # Bold text
Expand Down
Empty file.
38 changes: 38 additions & 0 deletions kindle2notion/languages/enums.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from enum import Enum


class Locale(Enum):
# Enum containing languages
ENGLISH = "en"
SPANISH = "es"

def __str__(self):
return self.value


class Word(Enum):
# For each word, we have to handle different languages
NOTE = {
Locale.ENGLISH: "note",
Locale.SPANISH: "nota"
}
LOCATION = {
Locale.ENGLISH: "location",
Locale.SPANISH: "posición",
}
PAGE = {
Locale.ENGLISH: "page",
Locale.SPANISH: "página",
}
DATE_ADDED = {
Locale.ENGLISH: "added on",
Locale.SPANISH: "añadido el",
}
# Date formats also depend on language
DATE_FORMAT = {
Locale.ENGLISH: "%A, %d %B %Y %I:%M:%S %p",
Locale.SPANISH: "%A, %d %B %Y %H:%M:%S",
}

def __str__(self, language=Locale.ENGLISH):
return self.value[language]
24 changes: 24 additions & 0 deletions kindle2notion/languages/word_detector.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from typing import List

from kindle2notion.languages.enums import Word, Locale


class WordDetector:

def __init__(self, languages: List[Locale]):
self.languages = languages
self.language_words = {lang: set() for lang in languages}

for word in Word:
for lang in word.value:
self.language_words[lang].add(word.value[lang])

def detect(self, text):
scores = {lang: 0 for lang in self.languages}
for lang, words in self.language_words.items():
scores[lang] = sum([len(word) for word in words if self.has_word(text, word)])
return max(scores, key=scores.get)

def has_word(self, text, word):
return word.lower() in text.lower()

38 changes: 26 additions & 12 deletions kindle2notion/parsing.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from datetime import datetime
from re import findall
from typing import Dict, List, Tuple

from dateparser import parse

from kindle2notion.languages.word_detector import WordDetector
from kindle2notion.languages.enums import Locale, Word

BOOKS_WO_AUTHORS = []

ACADEMIC_TITLES = [
Expand Down Expand Up @@ -80,6 +84,8 @@

DELIMITERS = ["; ", " & ", " and "]

WORD_DETECTOR = WordDetector([language for language in Locale])


def parse_raw_clippings_text(raw_clippings_text: str) -> Dict:
raw_clippings_list = raw_clippings_text.split("==========")
Expand Down Expand Up @@ -126,26 +132,34 @@ def _parse_page_location_date_and_note(
second_line_as_list = second_line.strip().split(" | ")
page = location = date = ""
is_note = False

for element in second_line_as_list:
element = element.lower()
if "note" in element:
language: Locale = WORD_DETECTOR.detect(element)
if Word.NOTE.value[language] in element:
is_note = True
if "page" in element:
page = element[element.find("page") :].replace("page", "").strip()
if "location" in element:
location = (
element[element.find("location") :].replace("location", "").strip()
)
if "added on" in element:
date = parse(
element[element.find("added on") :].replace("added on", "").strip()
if is_word_in_element(element, language, Word.PAGE):
page = _parse_word_from_element(element, language, Word.PAGE)
if is_word_in_element(element, language, Word.LOCATION):
location = _parse_word_from_element(element, language, Word.LOCATION)
if is_word_in_element(element, language, Word.DATE_ADDED):
date_string = _parse_word_from_element(element, language, Word.DATE_ADDED)
date_parsed: datetime = parse(
date_string, languages=[language.value for language in Locale]
)
date = date.strftime("%A, %d %B %Y %I:%M:%S %p")
date = date_parsed.strftime(Word.DATE_FORMAT.value[language])

return page, location, date, is_note


def is_word_in_element(element: str, language: Locale, word: Word):
return word.value[language] in element


def _parse_word_from_element(element: str, language: Locale, word: Word):
word_value_in_language = word.value[language]
return element[element.find(word_value_in_language):].replace(word_value_in_language, "").strip()


def _add_parsed_items_to_all_books_dict(
all_books: Dict,
title: str,
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

setup(
name="kindle2notion",
version="1.0.1",
version="1.0.2",
author="Jeffrey Jacob",
author_email="[email protected]",
description="Export all the clippings from your Kindle device to a database in Notion.",
Expand Down
40 changes: 40 additions & 0 deletions tests/test_exporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,43 @@ def test_prepare_aggregated_text_for_one_book_should_return_the_aggregated_text_
print(actual)
# Then
assert expected == actual


def test_when_date_is_not_ampm_format_then_aggregated_text_should_return_appropiate_date():
# Given
highlights = [
(
"This is an example highlight.",
"1",
"100",
"jueves, 24 de agosto de 2023 7:28:38",
False,
),
(
"This is a second example highlight.",
"2",
"200",
"viernes, 25 de agosto de 2023 7:28:38",
True,
),
]

expected = (
[
"This is an example highlight.\n* Page: 1, Location: 100, Date Added: jueves, 24 de agosto de 2023 7:28:38\n\n",
"> NOTE: \nThis is a second example highlight.\n* Page: 2, Location: 200, Date Added: viernes, 25 de agosto de 2023 7:28:38\n\n",
],
"viernes, 25 de agosto de 2023 7:28:38",
)

# When
actual = _prepare_aggregated_text_for_one_book(
highlights, enable_highlight_date=True
)
print(actual)
# Then
assert expected == actual


def test_when_date_is_not_ampm_format_then_aggregated_text_should_not_give_valueerror():
pass
17 changes: 17 additions & 0 deletions tests/test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,3 +333,20 @@ def test_add_parsed_items_to_books_dict_should_add_the_parsed_items_when_the_boo

# Then
assert expected == actual

def test_parse_date_when_format_does_not_include_am_pm():
# Given
raw_clipping_list = [
"Relativity (Einstein, Albert)",
"- La subrayado en la posición 558-560 | Añadido el viernes, 25 de agosto de 2023 7:28:38",
"",
"This is a test highlight.",
False,
]
expected = ("3", "", "Friday, 30 April 2021 12:31:29 AM", False)

# When
actual = _parse_page_location_date_and_note(raw_clipping_list)

# Then
assert expected == actual