Skip to content

Commit

Permalink
fix: pdf first page tailor (#1116)
Browse files Browse the repository at this point in the history
* fix: pdf first page tailor

* fix: pdf first page tailor
  • Loading branch information
kavons authored Jun 19, 2024
1 parent b9a5bca commit 588d672
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions modules/pdf_func.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def get_title_with_cropped_page(first_page):
elif word.text == "Abstract": # 获取页面abstract
top = word.top

user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,top)))]
user_info = [i["text"] for i in extract_words(first_page.within_bbox((x0,title_bottom,x1,bottom)))]
# 裁剪掉上半部分, within_bbox: full_included; crop: partial_included
return title, user_info, first_page.within_bbox((x0,top,x1,bottom))

Expand Down Expand Up @@ -150,4 +150,5 @@ def parse_pdf(filename, two_column = True):
# Test code
z = parse_pdf("./build/test.pdf")
print(z["user_info"])
print(z["title"])
print(z["title"])

0 comments on commit 588d672

Please sign in to comment.