Skip to content

Commit

Permalink
feat: ProcessPoolExecutor in main
Browse files Browse the repository at this point in the history
Signed-off-by: 117503445 <[email protected]>
  • Loading branch information
117503445 committed Jun 25, 2023
1 parent 851f0b1 commit ca7a805
Show file tree
Hide file tree
Showing 5 changed files with 85 additions and 60 deletions.
19 changes: 19 additions & 0 deletions docs/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,22 @@
- [x] dag 3
- [x] metabolome 6
- [x] metabolome 7
- [ ] pbft 符号作为 block
- [ ] gossip 每行一个 block

Li et al. - 2020 - A decentralized blockchain with high throughput an
- [ ] 3 shot 延展不足
- [ ] big-block 判断过严格

Zhao et al. - 2021 - A learned sketch for subgraph counting

- [ ] 3 8 big-block 判断过严格
- [ ] shot 空白延展过度(问题不大)

Lenzen and Sheikholeslami - 2022 - A Recursive Early-Stopping Phase King Protocol

- [] 7 big-block 漏报

Wang et al. - 2019 - A survey on consensus mechanisms and mining strate

- [] 8, 10, 13 shot 延展不足
85 changes: 45 additions & 40 deletions flow_pdf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,46 +40,51 @@ def create_task(file_input: Path, dir_output: Path):
cfg = ExecuterConfig(version, True) # type: ignore
e = Executer(file_input, dir_output, cfg)
e.register(workers_dev)
e.execute()
try:
e.execute()
except Exception as e:
logger.error(f'{file_input.name} failed')
file.write_text(dir_output / "error.txt", str(e))
logger.info(f"end {file_input.name}, time = {time.perf_counter() - t:.2f}s")


with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [
executor.submit(create_task, file_input, dir_output)
for file_input, dir_output in get_files_from_cfg()
]
for future in futures:
future.result()

if cfg['compare']['enabled']:
dir_target = Path(cfg['compare']['target'])

dir_output_list = []
for _, d in get_files_from_cfg():
dir_output_list.append(d)
dir_output_list.sort()

for dir_output in dir_output_list:
dir_t = dir_target / dir_output.stem
file_t = dir_t / "big_blocks_id" / 'big_blocks_id.json'
if not file_t.exists():
logger.warning(f"target file not found: {file_t}")
continue

cur = file.read_json(dir_output / 'big_blocks_id.json')
expect = file.read_json(file_t)

if cur != expect:
logger.debug(f'{dir_output.stem} changed')
for i in range(len(cur)):
if cur[i] != expect[i]:
add_list = []
del_list = []
for j in range(len(cur[i])):
if cur[i][j] not in expect[i]:
add_list.append(cur[i][j])
for j in range(len(expect[i])):
if expect[i][j] not in cur[i]:
del_list.append(expect[i][j])
logger.debug(f'page {i}, add: {add_list}, del: {del_list}')
if __name__ == "__main__":
with concurrent.futures.ProcessPoolExecutor() as executor:
futures = [
executor.submit(create_task, file_input, dir_output)
for file_input, dir_output in get_files_from_cfg()
]
for future in futures:
future.result()

if cfg['compare']['enabled']:
dir_target = Path(cfg['compare']['target'])

dir_output_list = []
for _, d in get_files_from_cfg():
dir_output_list.append(d)
dir_output_list.sort()

for dir_output in dir_output_list:
dir_t = dir_target / dir_output.stem
file_t = dir_t / "big_blocks_id" / 'big_blocks_id.json'
if not file_t.exists():
logger.warning(f"target file not found: {file_t}")
continue

cur = file.read_json(dir_output / 'big_blocks_id.json')
expect = file.read_json(file_t)

if cur != expect:
logger.debug(f'{dir_output.stem} changed')
for i in range(len(cur)):
if cur[i] != expect[i]:
add_list = []
del_list = []
for j in range(len(cur[i])):
if cur[i][j] not in expect[i]:
add_list.append(cur[i][j])
for j in range(len(expect[i])):
if expect[i][j] not in cur[i]:
del_list.append(expect[i][j])
logger.debug(f'page {i}, add: {add_list}, del: {del_list}')
38 changes: 19 additions & 19 deletions flow_pdf/worker/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,15 +99,15 @@ def run_page( # type: ignore[override]
# add_annot(page, rects, "", "purple")

# block not common span
rects = []
for c in page_in.big_blocks:
for block in c:
if block["type"] == 0:
for line in block["lines"]:
for span in line["spans"]:
if not is_common_span(span, doc_in.most_common_font, doc_in.common_size_range):
rects.append(span["bbox"])
add_annot(page, rects, "", "purple")
# rects = []
# for c in page_in.big_blocks:
# for block in c:
# if block["type"] == 0:
# for line in block["lines"]:
# for span in line["spans"]:
# if not is_common_span(span, doc_in.most_common_font, doc_in.common_size_range):
# rects.append(span["bbox"])
# add_annot(page, rects, "", "purple")

# new line
# rects = []
Expand Down Expand Up @@ -152,16 +152,16 @@ def run_page( # type: ignore[override]
add_annot(page, rects, "big-block", "blue")

# block with id
for block in page_in.raw_dict["blocks"]:
rect = block["bbox"]
a = f"b-{block['number']}"
page.add_freetext_annot(
(rect[2] - len(a) * 6, rect[1], rect[2], rect[1] + 10),
a,
fill_color=fitz.utils.getColor("white"),
border_color=fitz.utils.getColor("black"),
)
page.draw_rect(rect, color=fitz.utils.getColor("black")) # type: ignore
# for block in page_in.raw_dict["blocks"]:
# rect = block["bbox"]
# a = f"b-{block['number']}"
# page.add_freetext_annot(
# (rect[2] - len(a) * 6, rect[1], rect[2], rect[1] + 10),
# a,
# fill_color=fitz.utils.getColor("white"),
# border_color=fitz.utils.getColor("black"),
# )
# page.draw_rect(rect, color=fitz.utils.getColor("black")) # type: ignore

# shot in column view
if page_index in doc_in.abnormal_size_pages:
Expand Down
2 changes: 1 addition & 1 deletion flow_pdf/worker/read_doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def run_page( # type: ignore[override]
rects = []
for block in raw_dict["blocks"]:
rects.append(block["bbox"])
add_annot(page, rects, "block", "blue")
add_annot(page, rects, "", "blue")

page.get_pixmap(dpi=150).save(doc_in.dir_output / "pre-marked" / f"{page_index}.png") # type: ignore

Expand Down
1 change: 1 addition & 0 deletions script/cp2target.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,6 @@
f = changed_list[i]
file_id = f / "big_blocks_id.json"
dir_dest = dir_target / f.name / "big_blocks_id"
dir_dest.mkdir(parents=True, exist_ok=True)
shutil.copy(file_id, dir_dest)
file.append_text(dir_dest / "note.txt", f"accepted time: {current_time}\n")

0 comments on commit ca7a805

Please sign in to comment.