Skip to content

Commit bc51f9f

Browse files
authored
Merge pull request #2214 from myhloli/dev
refactor(footnote_detection): adjust footnote detection threshold
2 parents 4df8523 + 8caf59f commit bc51f9f

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

Diff for: magic_pdf/pre_proc/ocr_detect_all_bboxes.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,11 @@ def ocr_prepare_bboxes_for_layout_split_v2(
9999
all_discarded_blocks = []
100100
add_bboxes(discarded_blocks, BlockType.Discarded, all_discarded_blocks)
101101

102-
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半50%区域的"""
102+
"""footnote识别:宽度超过1/3页面宽度的,高度超过10的,处于页面下半30%区域的"""
103103
footnote_blocks = []
104104
for discarded in discarded_blocks:
105105
x0, y0, x1, y1 = discarded['bbox']
106-
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h / 2):
106+
if (x1 - x0) > (page_w / 3) and (y1 - y0) > 10 and y0 > (page_h * 0.7):
107107
footnote_blocks.append([x0, y0, x1, y1])
108108

109109
"""移除在footnote下面的任何框"""

0 commit comments

Comments
 (0)