Skip to content

Commit 7e159c4

Browse files
committed
Add test that pdfminer processed file layouelements are recognized as extracted
1 parent abcc4f3 commit 7e159c4

File tree

1 file changed

+6
-1
lines changed

1 file changed

+6
-1
lines changed

test_unstructured/partition/pdf_image/test_pdfminer_processing.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
from pdfminer.layout import LAParams
66
from PIL import Image
7-
from unstructured_inference.constants import Source as InferenceSource
7+
from unstructured_inference.constants import Source as InferenceSource, IsExtracted
88
from unstructured_inference.inference.elements import (
99
EmbeddedTextRegion,
1010
Rectangle,
@@ -249,6 +249,11 @@ def test_process_file_with_pdfminer():
249249
assert links[0][0]["url"] == "https://layout-parser.github.io"
250250

251251

252+
def test_process_file_with_pdfminer_is_extracted_array():
253+
layout, _ = process_file_with_pdfminer(example_doc_path("pdf/layout-parser-paper-fast.pdf"))
254+
assert all(is_extracted is IsExtracted.TRUE for is_extracted in layout[0].is_extracted_array)
255+
256+
252257
@patch("unstructured.partition.pdf_image.pdfminer_utils.LAParams", return_value=LAParams())
253258
def test_laprams_are_passed_from_partition_to_pdfminer(pdfminer_mock):
254259
partition(

0 commit comments

Comments
 (0)