Skip to content

Commit

Permalink
feat: big set of papers
Browse files Browse the repository at this point in the history
Signed-off-by: 117503445 <[email protected]>
  • Loading branch information
117503445 committed Jun 27, 2023
1 parent ca7a805 commit 8992dd6
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 10 deletions.
82 changes: 81 additions & 1 deletion docs/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@
- [x] Shot 去除白边
- [x] font size 范围优化
- [x] 出错后前端显示
- [ ] 大规模数据集
- [x] 大规模数据集
- [ ] 基于位置进行 Block 合并
- [ ] Shot 绝对大小
- [ ] 前端上传文件后清除 input
- [ ] fc 挂载 OSS
- [ ] fc-be 内网 endpoint / 挂载 OSS
- [ ] fc 删除旧版本数据
- [ ] Table of Contents
- [ ] Ligature 连字识别 (Hotstuff)
- [ ] shot 可复制文字 alt-data
- [ ] 斜体识别
- [ ] list 识别支持
- [ ] figure 识别支持
- [ ] Docker CLI
Expand Down Expand Up @@ -71,3 +75,79 @@ Lenzen and Sheikholeslami - 2022 - A Recursive Early-Stopping Phase King Protoco
Wang et al. - 2019 - A survey on consensus mechanisms and mining strate

- [] 8, 10, 13 shot 延展不足

Abraham et al. - 2022 - Efficient and Adaptively Secure Asynchronous Binar

- [] JSONGen 有问题

Aublin et al. - 2013 - Rbft Redundant byzantine fault tolerance

- [] no-common-span 提取

Bankhamer et al. - 2022 - Population Protocols for Exact Plurality Consensus

- [] 3 big-block 误报,漏报

Beaver 等。 - 2010 - Finding a needle in haystack Facebook's photo sto

- [] 5 6 shot 空白延展过度

Chang 等。 - 2008 - Bigtable A distributed storage system for structu

- [] min() arg is an empty sequence

Data_Replication_Using_Read-One-Write-All_Monitori

- [] min() arg is an empty sequence

Dean and Ghemawat - 2008 - MapReduce simplified data processing on large clu

- [] 大量 big-block 漏报

Gilad 等。 - 2017 - Algorand Scaling byzantine agreements for cryptoc

- [] 大量 big-block 漏报

Guo 等 - 2020 - Dumbo Faster asynchronous bft protocols

- [] Invalid bandwriter header dimensions/setup

Kapritsos et al. - 2012 - All about eve Execute-verify replication for mult

- [] min() arg is an empty sequence

Kotla 和 Dahlin - 2004 - High throughput Byzantine fault tolerance

- [] Invalid bandwriter header dimensions/setup

Li et al. - 2020 - A decentralized blockchain with high throughput an

- [] 大量 big-block 漏报

Li et al. - 2020 - GHAST Breaking confirmation delay barrier in naka

- [] Invalid bandwriter header dimensions/setup

Liu 等。 - 2018 - Scalable byzantine consensus via hardware-assisted

- [] Invalid bandwriter header dimensions/setup

Miller 等 - 2016 - The honey badger of BFT protocols

- [] 大量 big-block 漏报

practical byzantine fault tolerance

- [] big-block 排列不规则

Sankar 等。 - 2017 - Survey of consensus protocols on blockchain applic

- [] min() arg is an empty sequence

Scales 等。 - 2010 - The design of a practical system for fault-toleran

- [] min() arg is an empty sequence

Zhu et al. - 2022 - Postharvest quality monitoring and cold chain mana

- [] min() arg is an empty sequence
17 changes: 9 additions & 8 deletions flow_pdf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

cfg = yaml.load(Path("./config.yaml").read_text(), Loader=yaml.FullLoader)


def get_files_from_cfg():
dir_input = Path(cfg["path"]["input"])
dir_output = Path(cfg["path"]["output"])
Expand Down Expand Up @@ -43,22 +44,22 @@ def create_task(file_input: Path, dir_output: Path):
try:
e.execute()
except Exception as e:
logger.error(f'{file_input.name} failed')
logger.error(f"{file_input.name} failed")
file.write_text(dir_output / "error.txt", str(e))
logger.info(f"end {file_input.name}, time = {time.perf_counter() - t:.2f}s")


if __name__ == "__main__":
with concurrent.futures.ProcessPoolExecutor() as executor:
with concurrent.futures.ProcessPoolExecutor(max_workers=6) as executor:
futures = [
executor.submit(create_task, file_input, dir_output)
for file_input, dir_output in get_files_from_cfg()
]
for future in futures:
future.result()

if cfg['compare']['enabled']:
dir_target = Path(cfg['compare']['target'])
if cfg["compare"]["enabled"]:
dir_target = Path(cfg["compare"]["target"])

dir_output_list = []
for _, d in get_files_from_cfg():
Expand All @@ -67,16 +68,16 @@ def create_task(file_input: Path, dir_output: Path):

for dir_output in dir_output_list:
dir_t = dir_target / dir_output.stem
file_t = dir_t / "big_blocks_id" / 'big_blocks_id.json'
file_t = dir_t / "big_blocks_id" / "big_blocks_id.json"
if not file_t.exists():
logger.warning(f"target file not found: {file_t}")
continue

cur = file.read_json(dir_output / 'big_blocks_id.json')
cur = file.read_json(dir_output / "big_blocks_id.json")
expect = file.read_json(file_t)

if cur != expect:
logger.debug(f'{dir_output.stem} changed')
logger.debug(f"{dir_output.stem} changed")
for i in range(len(cur)):
if cur[i] != expect[i]:
add_list = []
Expand All @@ -87,4 +88,4 @@ def create_task(file_input: Path, dir_output: Path):
for j in range(len(expect[i])):
if expect[i][j] not in cur[i]:
del_list.append(expect[i][j])
logger.debug(f'page {i}, add: {add_list}, del: {del_list}')
logger.debug(f"page {i}, add: {add_list}, del: {del_list}")
3 changes: 2 additions & 1 deletion flow_pdf/worker/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,8 @@ def add_annot(page, rects, annot: str, color):
if annot:
a = f"{annot}-{i}"
page.add_freetext_annot(
(rect[0], rect[1], rect[0] + len(a) * 6, rect[1] + 10),
# (rect[0], rect[1], rect[0] + len(a) * 6, rect[1] + 10),
(rect[2] - len(a) * 6, rect[1], rect[2] , rect[1] + 10),
a,
fill_color=fitz.utils.getColor("white"),
border_color=fitz.utils.getColor("black"),
Expand Down

0 comments on commit 8992dd6

Please sign in to comment.