|
1 | 1 | import base64 |
2 | 2 | import dill # type: ignore |
3 | 3 | import json |
4 | | -import requests |
| 4 | +import requests # type: ignore |
5 | 5 | import os |
6 | 6 | import time |
7 | 7 | import json |
@@ -42,24 +42,29 @@ def analyze_equations( |
42 | 42 | file_path: Optional[str] = None, |
43 | 43 | url_path: Optional[str] = None, |
44 | 44 | parsed_paper: Optional[ParseResponse] = None, |
45 | | - ) -> Optional[EquationExtractionResponse]: |
46 | | - response: Union[EquationExtractionResponse, EquationProcessingResponse] |
47 | | - |
| 45 | + ) -> Optional[EquationExtractionResponse]: |
48 | 46 | if file_path: |
49 | | - with open(file_path, "rb") as file: |
50 | | - response = self._ax_client.document.equation.from_pdf(document=file) |
| 47 | + with open(file_path, "rb") as pdf_file: |
| 48 | + response = self._ax_client.document.equation.from_pdf(document=pdf_file) |
| 49 | + |
51 | 50 | elif url_path: |
52 | 51 | if "arxiv" in url_path and "abs" in url_path: |
53 | 52 | url_path = url_path.replace("abs", "pdf") |
54 | | - |
55 | | - response = self._ax_client.document.equation.from_pdf(document=url_path) |
| 53 | + url_file = requests.get(url_path) |
| 54 | + from io import BytesIO |
| 55 | + pdf_stream = BytesIO(url_file.content) |
| 56 | + response = self._ax_client.document.equation.from_pdf(document=pdf_stream) |
| 57 | + |
56 | 58 | elif parsed_paper: |
57 | | - response = self._ax_client.document.equation.process(**parsed_paper.model_dump()) |
| 59 | + response = EquationExtractionResponse.model_validate( |
| 60 | + self._ax_client.document.equation.process(**parsed_paper.model_dump()).model_dump() |
| 61 | + ) |
| 62 | + |
58 | 63 | else: |
59 | 64 | print("Please provide either a file path or a URL to analyze.") |
60 | 65 | return None |
61 | | - |
62 | | - return EquationExtractionResponse(equations=response.equations) |
| 66 | + |
| 67 | + return response |
63 | 68 |
|
64 | 69 | def validate_equations( |
65 | 70 | self, |
|
0 commit comments