Skip to content

Commit 50eb9be

Browse files
authored
Merge pull request #11 from getmaxun/develop
chore: v0.0.7
2 parents bb92db2 + 91deaae commit 50eb9be

1 file changed

Lines changed: 80 additions & 2 deletions

File tree

client.py

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import time
2+
import os
23
import httpx
34
from datetime import datetime, timezone
4-
from typing import Optional
5+
from typing import Optional, Union
56
from .types import Config, MaxunError
67

78

@@ -11,7 +12,8 @@ def __init__(self, config: Config):
1112

1213
headers = {
1314
"x-api-key": self.api_key,
14-
"Content-Type": "application/json",
15+
# Content-Type is intentionally omitted here so httpx can set it
16+
# correctly per request (e.g. multipart/form-data for file uploads)
1517
}
1618

1719
if config.team_id:
@@ -161,6 +163,82 @@ async def extract_with_llm(self, options: dict):
161163
self.client.post("/extract/llm", json=options, timeout=300)
162164
)
163165

166+
async def create_document_extract_robot(
167+
self,
168+
file: Union[str, bytes],
169+
prompt: str,
170+
robot_name: Optional[str] = None,
171+
ollama_model: Optional[str] = None,
172+
file_name: Optional[str] = None,
173+
) -> dict:
174+
"""Create a document-extraction robot from a PDF file path or bytes."""
175+
if isinstance(file, str):
176+
file_name = file_name or os.path.basename(file)
177+
with open(file, 'rb') as f:
178+
file_bytes = f.read()
179+
else:
180+
file_bytes = file
181+
file_name = file_name or 'document.pdf'
182+
183+
data = {'prompt': prompt}
184+
if robot_name:
185+
data['robotName'] = robot_name
186+
if ollama_model:
187+
data['ollamaModel'] = ollama_model
188+
189+
response = await self.client.post(
190+
'/robots/document',
191+
files={'file': (file_name, file_bytes, 'application/pdf')},
192+
data=data,
193+
timeout=120,
194+
)
195+
response.raise_for_status()
196+
body = response.json()
197+
if not body.get('data') and not body.get('robot'):
198+
raise MaxunError('Failed to create document robot')
199+
return body
200+
201+
async def create_document_parse_robot(
202+
self,
203+
file: Union[str, bytes],
204+
output_formats: list,
205+
robot_name: Optional[str] = None,
206+
file_name: Optional[str] = None,
207+
) -> dict:
208+
"""Create a document-parse robot from a PDF file path or bytes."""
209+
if isinstance(file, str):
210+
file_name = file_name or os.path.basename(file)
211+
with open(file, 'rb') as f:
212+
file_bytes = f.read()
213+
else:
214+
file_bytes = file
215+
file_name = file_name or 'document.pdf'
216+
217+
valid_formats = {'markdown', 'html', 'links'}
218+
filtered = [f for f in output_formats if f in valid_formats]
219+
if not filtered:
220+
raise MaxunError('At least one valid output format is required (markdown, html, links)')
221+
222+
data = {}
223+
if robot_name:
224+
data['robotName'] = robot_name
225+
226+
files_payload = [('file', (file_name, file_bytes, 'application/pdf'))]
227+
for fmt in filtered:
228+
files_payload.append(('outputFormats[]', (None, fmt)))
229+
230+
response = await self.client.post(
231+
'/robots/document-parse',
232+
files=files_payload,
233+
data=data,
234+
timeout=120,
235+
)
236+
response.raise_for_status()
237+
body = response.json()
238+
if not body.get('data') and not body.get('robot'):
239+
raise MaxunError('Failed to create document-parse robot')
240+
return body
241+
164242
async def create_crawl_robot(self, url: str, options: dict):
165243
return await self._handle(
166244
self.client.post("/crawl", json={"url": url, **options})

0 commit comments

Comments
 (0)