From ccc46359fdb2b06daa9a48d008a1aa27b9928dc7 Mon Sep 17 00:00:00 2001 From: Eunchan Cho Date: Wed, 28 Oct 2020 17:49:50 +0900 Subject: [PATCH] Update Exporter --- .gitignore | 1 + .vscode/settings.json | 3 - build_site.py | 74 ++++++++++++ exporter.py | 254 ++++++++++++++++++++++++++++++++++++++++++ notion-md-exporter.py | 198 -------------------------------- 5 files changed, 329 insertions(+), 201 deletions(-) delete mode 100644 .vscode/settings.json create mode 100644 build_site.py create mode 100644 exporter.py delete mode 100644 notion-md-exporter.py diff --git a/.gitignore b/.gitignore index dbe9c82..0ba4a7f 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ +__pychache__/ .vscode/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 64de790..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,3 +0,0 @@ -{ - "python.pythonPath": "/Users/wsy/opt/anaconda3/bin/python" -} \ No newline at end of file diff --git a/build_site.py b/build_site.py new file mode 100644 index 0000000..d671b19 --- /dev/null +++ b/build_site.py @@ -0,0 +1,74 @@ +import streamlit as st +import base64 +import shutil +from zipfile import ZipFile +from pathlib import Path +import notion +import os +from notion.client import NotionClient +import requests +import sys +from exporter import PageBlockExporter + +def export_cli(token_v2,url): + if not(os.path.isdir(directory)): + os.makedirs(os.path.join(directory)) + + client=NotionClient(token_v2=token_v2) + url=url + + exporter = PageBlockExporter(url,client) + exporter.create_main_folder(directory) + exporter.create_file() + export(exporter) + +def export(exporter): + """Recursively export page block with its sub pages + + Args: + exporter(PageBlockExporter()): export page block + """ + exporter.page2md(tapped = 0) + try: + exporter.write_file() + except: + st.markdown(f"Error exporting {exporter.title}.md!") + for sub_exporter in exporter.sub_exporters: + export(sub_exporter) + +def zipdir(path, ziph): + # ziph is zipfile handle + for root, dirs, files in os.walk(path): + for file in files: + ziph.write(os.path.join(root, file)) + +# main proc starts here +st.title("Notion Markdown Exporter") +st.markdown("This Web app is developed by [Shuyi Wang](https://twitter.com/wshuyi) based on [Eunchan Cho(@echo724)\'s notion2md](https://github.com/echo724/notion2md)") +st.markdown("The coressponding [Github Page of this app is here](https://github.com/wshuyi/demo-notion-markdown-exporter).") + + +token_v2 = st.text_input("Your Notion token_v2:") +url = st.text_input("The Link or ID you want to export:") + +running = False + +directory='./notion_output/' + +if token_v2 and url and not running: + if st.button("export"): + running = True + + if Path(directory).exists(): + shutil.rmtree(Path(directory)) + export_cli(token_v2, url) + with ZipFile('exported.zip', 'w') as myzip: + zipdir(directory, myzip) + with open('exported.zip', "rb") as f: + bytes = f.read() + b64 = base64.b64encode(bytes).decode() + href = f'\ + Click to download\ + ' + st.markdown(href, unsafe_allow_html=True) + running = False diff --git a/exporter.py b/exporter.py new file mode 100644 index 0000000..fe10854 --- /dev/null +++ b/exporter.py @@ -0,0 +1,254 @@ +import os +import requests +from datetime import datetime + +class PageBlockExporter: + def __init__(self,url,client): + self.client = client + self.page = self.client.get_block(url) + self.title = self.page.title + self.file_name = self.page.title + self.md = "" + self.image_dir="" + self.download_dir="" + self.sub_exporters = [] + + def create_main_folder(self,directory): + """create folder with file name + + Args: + directory(Stirng): set empty by default. + """ + self.dir = directory + self.title +'/' + + if not(os.path.isdir(self.dir)): + os.makedirs(os.path.join(self.dir)) + + def create_folder(self,directory): + """create folder with directory + + Args: + directory(Stirng): set empty by default. + """ + self.dir = directory + + if not(os.path.isdir(self.dir)): + os.makedirs(os.path.join(self.dir)) + + def create_sub_folder(self): + """create sub folder with current file name + + Args: + directory(Stirng): set empty by default. + """ + self.sub_dir = self.dir + 'subpage/' + if not(os.path.isdir(self.sub_dir)): + os.makedirs(os.path.join(self.sub_dir)) + + def create_file(self): + """create md file that md will be stored + + Returns: + self.file(String): path of file + """ + file_path = os.path.join(self.dir ,self.file_name + '.md') + self.file = open(file_path,'w') + return file_path + + def write_file(self): + """save markdown output in the file + """ + self.file.write(self.md) + self.file.close() + + def create_image_foler(self): + """create image output directory + """ + self.image_dir = os.path.join(self.dir,'image/') + if not(os.path.isdir(self.image_dir)): + os.makedirs(os.path.join(self.image_dir)) + + def image_export(self,url,count): + """make image file based on url and count. + + Args: + url(Stirng): url of image + count(int): the number of image in the page + + Returns: + image_path(String): image_path for the link in markdown + """ + if self.image_dir is "": + self.create_image_foler() + + image_path = self.image_dir + 'img_{0}.png'.format(count) + r = requests.get(url, allow_redirects=True) + open(image_path,'wb').write(r.content) + return image_path + + def create_download_foler(self): + """create download output directory + """ + self.download_dir = os.path.join(self.dir,'download/') + if not(os.path.isdir(self.download_dir)): + os.makedirs(os.path.join(self.download_dir)) + + def downlaod_file(self,url,file_name): + """download a file in the page. + + Args: + url(Stirng): url of the downlaod file + file_name(String): name of the file + + Returns: + None + """ + if self.download_dir is "": + self.create_download_foler() + + download_path = self.download_dir + file_name + r = requests.get(url, allow_redirects=True) + open(download_path,'wb').write(r.content) + + def block2md(self,block): + try: + btype = block.type + except: + print(block) + return + if btype != "numbered_list": + numbered_list_index = 0 + try: + bt = block.title + except: + pass + if btype == 'header': + return "# " + bt + if btype == "sub_header": + return "## " +bt + if btype == "sub_sub_header": + return "### " +bt + if btype == 'page': + self.create_sub_folder() + sub_url = block.get_browseable_url() + exporter = PageBlockExporter(sub_url,self.client) + exporter.create_folder(self.sub_dir) + sub_page_path = exporter.create_file() + try: + if "https:" in block.icon: + icon = "!"+link_format("",block.icon) + else: + icon = block.icon + except: + icon = "" + self.sub_exporters.append(exporter) + return icon + link_format(exporter.file_name,sub_page_path) + if btype == 'text': + if bt == "": + return + return bt +" " + if btype == 'bookmark': + return link_format(bt,block.link) + if btype == "video" or btype == "file" or btype =="audio" or btype =="pdf" or btype == "gist": + return link_format(block.source,block.source) + if btype == "bulleted_list" or btype == "toggle": + return '- '+bt + if btype == "numbered_list": + numbered_list_index += 1 + return str(numbered_list_index)+'. ' + bt + if btype == "image": + img_count += 1 + img_path = self.image_export(block.source,img_count) + return "!"+link_format(img_path,img_path) + if btype == "code": + return "``` "+block.language.lower()+"\n"+block.title+"\n```" + if btype == "equation": + return "$$"+block.latex+"$$" + if btype == "divider": + return "---" + if btype == "to_do": + if block.checked: + return "- [x] "+ bt + else: + return "- [ ]" + bt + if btype == "quote": + return "> "+bt + if btype == "column" or btype =="column_list": + return + if btype == "file": + self.downlaod_file(block.source,block.title) + print("\n[Download]'{0}' is saved in 'download' folder".format(block.title)) + if btype == "collection_view": + collection = block.collection + return self.make_table(collection) + if block.children and btype != 'page': + tapped += 1 + self.page2md(tapped,page=block) + + def page2md(self,tapped,page=None): + """change notion's block to markdown string + """ + if tapped == 0: + img_count = 0 + numbered_list_index = 0 + else: + self.md += '\n' + for i in range(tapped): + self.md += '\t' + if page is None: + page = self.page + count = 0 + for block in page.children: + if block != page.children[0]: + self.md +="\n\n" + try: + self.md += self.block2md(block) + except: + self.md += "" + + def make_table(self,collection): + columns = [] + row_blocks=collection.get_rows() + for proptitle in row_blocks[0].schema: + prop = proptitle['name'] + if prop == "Name": + columns.insert(0,prop) + else: + columns.append(prop) + table = [] + table.append(columns) + for row in row_blocks: + row_content = [] + for column in columns: + if column == "Name" and row.get("content") is not None: + content = self.block2md(row) + else: + content = row.get_property(column) + if str(type(content))=="": + content = ', '.join(content) + if str(type(content)) == "": + content = content.strftime('%b %d, %Y') + if column =="Name": + row_content.insert(0,content) + else: + row_content.append(content) + table.append(row_content) + return table_to_markdown(table) + +def link_format(name,url): + """make markdown link format string + """ + return "["+name+"]"+"("+url+")" + +def table_to_markdown(table): + md = "" + md += join_with_vertical(table[0]) + md += "\n---|---|---\n" + for row in table[1:]: + if row != table[1]: + md += '\n' + md += join_with_vertical(row) + return md + +def join_with_vertical(list): + return " | ".join(list) \ No newline at end of file diff --git a/notion-md-exporter.py b/notion-md-exporter.py deleted file mode 100644 index bc79a31..0000000 --- a/notion-md-exporter.py +++ /dev/null @@ -1,198 +0,0 @@ -import streamlit as st -import base64 -import shutil -from zipfile import ZipFile -from pathlib import Path -import notion -import os -from notion.client import NotionClient -import requests -import sys - -def recursive_getblocks(block,container,client): - new_id = client.get_block(block.id) - if not new_id in container: - container.append(new_id) - try: - for children_id in block.get("content"): - children = client.get_block(children_id) - recursive_getblocks(children,container,client) - except: - return - -def link(name,url): - return "["+name+"]"+"("+url+")" - -def image_export(url,count,directory): - img_dir = directory + 'img_{0}.png'.format(count) - r = requests.get(url, allow_redirects=True) - open(img_dir,'wb').write(r.content) - return img_dir - -def block2md(blocks,directory): - md = "" - img_count = 0 - numbered_list_index = 0 - title = blocks[0].title - title = title.replace(' ','') - directory += '{0}/'.format(title) - if not(os.path.isdir(directory)): - Path(directory).mkdir() - for block in blocks: - try: - btype = block.type - except: - continue - if btype != "numbered_list": - numbered_list_index = 0 - try: - bt = block.title - except: - pass - if btype == 'header': - md += "# " + bt - elif btype == "sub_header": - md += "## " +bt - elif btype == "sub_sub_header": - md += "### " +bt - elif btype == 'page': - try: - if "https:" in block.icon: - icon = "!"+link("",block.icon) - else: - icon = block.icon - md += "# " + icon + bt - except: - md += "# " + bt - elif btype == 'text': - md += bt +" " - elif btype == 'bookmark': - md += link(bt,block.link) - elif btype == "video" or btype == "file" or btype =="audio" or btype =="pdf" or btype == "gist": - md += link(block.source,block.source) - elif btype == "bulleted_list" or btype == "toggle": - md += '- '+bt - elif btype == "numbered_list": - numbered_list_index += 1 - md += str(numbered_list_index)+'. ' + bt - elif btype == "image": - img_count += 1 - try: - img_dir = image_export(block.source,img_count,directory) - md += "!"+link(img_dir,img_dir) - except: - # pass - st.markdown(f"error exporting {block.source}") - elif btype == "code": - md += "```"+block.language+"\n"+block.title+"\n```" - elif btype == "equation": - md += "$$"+block.latex+"$$" - elif btype == "divider": - md += "---" - elif btype == "to_do": - if block.checked: - md += "- [x] "+ bt - else: - md += "- [ ]" + bt - elif btype == "quote": - md += "> "+bt - elif btype == "column" or btype =="column_list": - continue - else: - pass - md += "\n\n" - return md - -def export(url,token): - client = NotionClient(token_v2=token) - page = client.get_block(url) - blocks = [] - recursive_getblocks(page,blocks,client) - md = block2md(blocks,'./') - return md - -def export_cli(fname, directory, token_v2, url): - fname = os.path.join(directory,fname) - file = open(fname,'w') - blocks = [] - - client = NotionClient(token_v2 = token_v2) - page = client.get_block(url) - - recursive_getblocks(page,blocks,client) - md = block2md(blocks,directory) - - file.write(md) - file.close() - -def notion_markdown_export(token_v2, url, directory): - pages_to_download = [] - - client = NotionClient(token_v2 = token_v2) - page = client.get_block(url) - - for children_id in page.get("content"): - children = client.get_block(children_id) - if children.title: - pages_to_download.append({"title":children.title, "id":children.id}) - - if not(os.path.isdir(directory)): - Path(directory).mkdir() - - for item in pages_to_download: - try: - export_cli(f"{item['title']}.md", directory, token_v2, item["id"]) - except: - st.markdown(f"Error exporting {item['title']}.md!") - return - -def adjust_notion_image_dir(source_md): - with open(source_md) as f: - data = f.read() - data = data.replace("./notion_output/", "") - with open(source_md, 'w') as f: - f.write(data) - -def batch_adjust_notion_image_dir(directory): - source_mds = list(Path(directory).glob("*.md")) - for source_md in source_mds: - adjust_notion_image_dir(source_md) - -def zipdir(path, ziph): - # ziph is zipfile handle - for root, dirs, files in os.walk(path): - for file in files: - ziph.write(os.path.join(root, file)) - -# main proc starts here -st.title("Notion Markdown Exporter") -st.markdown("This Web app is developed by [Shuyi Wang](https://twitter.com/wshuyi) based on [Eunchan Cho(@echo724)\'s notion2md](https://github.com/echo724/notion2md)") -st.markdown("The coressponding [Github Page of this app is here](https://github.com/wshuyi/demo-notion-markdown-exporter).") - - -token_v2 = st.text_input("Your Notion token_v2:") -url = st.text_input("The Link or ID you want to export:") - -directory = './notion_output/' - - -running = False - -if token_v2 and url and not running: - if st.button("export"): - running = True - - if Path(directory).exists(): - shutil.rmtree(Path(directory)) - notion_markdown_export(token_v2, url, directory) - batch_adjust_notion_image_dir(directory) - with ZipFile('exported.zip', 'w') as myzip: - zipdir(directory, myzip) - with open('exported.zip', "rb") as f: - bytes = f.read() - b64 = base64.b64encode(bytes).decode() - href = f'\ - Click to download\ - ' - st.markdown(href, unsafe_allow_html=True) - running = False