Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
## 0.18.17-dev0

### Enhancement
- Flag extracted elements as such in the metadata for downstream use

### Features

### Fixes

## 0.18.16

### Enhancement
Expand Down
12 changes: 6 additions & 6 deletions requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ click==8.3.0
# via
# nltk
# python-oxmsg
cryptography==46.0.2
cryptography==46.0.3
# via unstructured-client
dataclasses-json==0.6.7
# via
Expand Down Expand Up @@ -85,11 +85,11 @@ packaging==25.0
# via
# marshmallow
# unstructured-client
psutil==7.1.0
psutil==7.1.3
# via -r ./base.in
pycparser==2.23
# via cffi
pypdf==6.1.1
pypdf==6.1.3
# via unstructured-client
python-dateutil==2.9.0.post0
# via unstructured-client
Expand All @@ -99,9 +99,9 @@ python-magic==0.4.27
# via -r ./base.in
python-oxmsg==0.0.2
# via -r ./base.in
rapidfuzz==3.14.1
rapidfuzz==3.14.3
# via -r ./base.in
regex==2025.9.18
regex==2025.11.3
# via nltk
requests==2.32.5
# via
Expand Down Expand Up @@ -150,5 +150,5 @@ urllib3==2.5.0
# unstructured-client
webencodings==0.5.1
# via html5lib
wrapt==1.17.3
wrapt==2.0.0
# via -r ./base.in
2 changes: 1 addition & 1 deletion requirements/dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ typing-extensions==4.15.0
# -c ./base.txt
# -c ./test.txt
# virtualenv
virtualenv==20.35.3
virtualenv==20.35.4
# via pre-commit
wheel==0.45.1
# via pip-tools
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-markdown.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,5 +4,5 @@
#
# pip-compile ./extra-markdown.in
#
markdown==3.9
markdown==3.10
# via -r ./extra-markdown.in
20 changes: 10 additions & 10 deletions requirements/extra-paddleocr.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ charset-normalizer==3.4.4
# via
# -c ./base.txt
# requests
cython==3.1.4
cython==3.2.0
# via unstructured-paddleocr
exceptiongroup==1.3.0
# via
Expand Down Expand Up @@ -58,7 +58,7 @@ idna==3.11
# anyio
# httpx
# requests
imageio==2.37.0
imageio==2.37.2
# via scikit-image
lazy-loader==0.4
# via scikit-image
Expand Down Expand Up @@ -101,31 +101,31 @@ packaging==25.0
# -c ./base.txt
# lazy-loader
# scikit-image
paddlepaddle==3.2.0
paddlepaddle==3.2.1
# via -r ./extra-paddleocr.in
pillow==11.3.0
pillow==12.0.0
# via
# imageio
# paddlepaddle
# scikit-image
# unstructured-paddleocr
protobuf==6.32.1
protobuf==6.33.0
# via
# -c ./deps/constraints.txt
# paddlepaddle
pyclipper==1.3.0.post6
# via unstructured-paddleocr
pydantic==2.12.2
pydantic==2.12.4
# via albumentations
pydantic-core==2.41.4
pydantic-core==2.41.5
# via pydantic
python-docx==1.2.0
# via unstructured-paddleocr
pyyaml==6.0.3
# via
# albumentations
# unstructured-paddleocr
rapidfuzz==3.14.1
rapidfuzz==3.14.3
# via
# -c ./base.txt
# unstructured-paddleocr
Expand Down Expand Up @@ -153,9 +153,9 @@ soupsieve==2.8
# via
# -c ./base.txt
# beautifulsoup4
stringzilla==4.2.1
stringzilla==4.2.3
# via albucore
termcolor==3.1.0
termcolor==3.2.0
# via fire
tifffile==2025.5.10
# via scikit-image
Expand Down
2 changes: 1 addition & 1 deletion requirements/extra-pdf-image.in
Original file line number Diff line number Diff line change
Expand Up @@ -12,5 +12,5 @@ google-cloud-vision
effdet
# Do not move to constraints.in, otherwise unstructured-inference will not be upgraded
# when unstructured library is.
unstructured-inference>=1.0.5
unstructured-inference>=1.1.1
unstructured.pytesseract>=0.3.12
49 changes: 25 additions & 24 deletions requirements/extra-pdf-image.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# pip-compile ./extra-pdf-image.in
#
accelerate==1.10.1
accelerate==1.11.0
# via unstructured-inference
antlr4-python3-runtime==4.9.3
# via omegaconf
Expand All @@ -27,13 +27,13 @@ coloredlogs==15.0.1
# via onnxruntime
contourpy==1.3.2
# via matplotlib
cryptography==46.0.2
cryptography==46.0.3
# via
# -c ./base.txt
# pdfminer-six
cycler==0.12.1
# via matplotlib
deprecated==1.2.18
deprecated==1.3.1
# via pikepdf
effdet==0.4.1
# via -r ./extra-pdf-image.in
Expand All @@ -46,31 +46,32 @@ flatbuffers==25.9.23
# via onnxruntime
fonttools==4.60.1
# via matplotlib
fsspec==2025.9.0
fsspec==2025.10.0
# via
# huggingface-hub
# torch
google-api-core[grpc]==2.26.0
google-api-core[grpc]==2.28.1
# via google-cloud-vision
google-auth==2.41.1
google-auth==2.42.1
# via
# google-api-core
# google-cloud-vision
google-cloud-vision==3.10.2
google-cloud-vision==3.11.0
# via -r ./extra-pdf-image.in
googleapis-common-protos==1.70.0
googleapis-common-protos==1.71.0
# via
# google-api-core
# grpcio-status
grpcio==1.75.1
grpcio==1.76.0
# via
# google-api-core
# google-cloud-vision
# grpcio-status
grpcio-status==1.75.1
grpcio-status==1.76.0
# via google-api-core
hf-xet==1.1.10
hf-xet==1.2.0
# via huggingface-hub
huggingface-hub==0.35.3
huggingface-hub==0.36.0
# via
# accelerate
# timm
Expand Down Expand Up @@ -123,7 +124,7 @@ onnx==1.19.1
# via
# -r ./extra-pdf-image.in
# unstructured-inference
onnxruntime==1.23.1
onnxruntime==1.23.2
# via
# -r ./extra-pdf-image.in
# unstructured-inference
Expand All @@ -150,9 +151,9 @@ pdfminer-six==20250327
# unstructured-inference
pi-heif==1.1.1
# via -r ./extra-pdf-image.in
pikepdf==9.11.0
pikepdf==10.0.0
# via -r ./extra-pdf-image.in
pillow==11.3.0
pillow==12.0.0
# via
# matplotlib
# pdf2image
Expand All @@ -164,7 +165,7 @@ proto-plus==1.26.1
# via
# google-api-core
# google-cloud-vision
protobuf==6.32.1
protobuf==6.33.0
# via
# -c ./deps/constraints.txt
# google-api-core
Expand All @@ -174,7 +175,7 @@ protobuf==6.32.1
# onnx
# onnxruntime
# proto-plus
psutil==7.1.0
psutil==7.1.3
# via
# -c ./base.txt
# accelerate
Expand All @@ -192,11 +193,11 @@ pycparser==2.23
# cffi
pyparsing==3.2.5
# via matplotlib
pypdf==6.1.1
pypdf==6.1.3
# via
# -c ./base.txt
# -r ./extra-pdf-image.in
pypdfium2==4.30.0
pypdfium2==5.0.0
# via unstructured-inference
python-dateutil==2.9.0.post0
# via
Expand All @@ -214,11 +215,11 @@ pyyaml==6.0.3
# omegaconf
# timm
# transformers
rapidfuzz==3.14.1
rapidfuzz==3.14.3
# via
# -c ./base.txt
# unstructured-inference
regex==2025.9.18
regex==2025.11.3
# via
# -c ./base.txt
# transformers
Expand All @@ -245,7 +246,7 @@ sympy==1.14.0
# via
# onnxruntime
# torch
timm==1.0.20
timm==1.0.22
# via
# effdet
# unstructured-inference
Expand Down Expand Up @@ -282,7 +283,7 @@ typing-extensions==4.15.0
# torch
tzdata==2025.2
# via pandas
unstructured-inference==1.0.5
unstructured-inference==1.1.1
# via -r ./extra-pdf-image.in
unstructured-pytesseract==0.3.15
# via -r ./extra-pdf-image.in
Expand All @@ -291,7 +292,7 @@ urllib3==2.5.0
# -c ./base.txt
# -c ./deps/constraints.txt
# requests
wrapt==1.17.3
wrapt==2.0.0
# via
# -c ./base.txt
# deprecated
2 changes: 1 addition & 1 deletion requirements/extra-pptx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#
lxml==6.0.2
# via python-pptx
pillow==11.3.0
pillow==12.0.0
# via python-pptx
python-pptx==1.0.2
# via -r ./extra-pptx.in
Expand Down
12 changes: 6 additions & 6 deletions requirements/extra-xlsx.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,16 @@ cffi==2.0.0
# via
# -c ./base.txt
# cryptography
cryptography==46.0.2
cryptography==46.0.3
# via
# -c ./base.txt
# msoffcrypto-tool
et-xmlfile==2.0.0
# via openpyxl
msoffcrypto-tool==5.4.2
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
networkx==3.4.2
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
numpy==2.2.6
# via
# -c ./base.txt
Expand All @@ -27,9 +27,9 @@ olefile==0.47
# -c ./base.txt
# msoffcrypto-tool
openpyxl==3.1.5
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
pandas==2.3.3
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
pycparser==2.23
# via
# -c ./base.txt
Expand All @@ -51,4 +51,4 @@ typing-extensions==4.15.0
tzdata==2025.2
# via pandas
xlrd==2.0.2
# via -r ./extra-xlsx.in
# via -r extra-xlsx.in
8 changes: 4 additions & 4 deletions requirements/huggingface.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ filelock==3.20.0
# huggingface-hub
# torch
# transformers
fsspec==2025.9.0
fsspec==2025.10.0
# via
# huggingface-hub
# torch
hf-xet==1.1.10
hf-xet==1.2.0
# via huggingface-hub
huggingface-hub==0.35.3
huggingface-hub==0.36.0
# via
# tokenizers
# transformers
Expand Down Expand Up @@ -64,7 +64,7 @@ pyyaml==6.0.3
# via
# huggingface-hub
# transformers
regex==2025.9.18
regex==2025.11.3
# via
# -c ./base.txt
# sacremoses
Expand Down
Loading