From c8f8f3d2d5ecc7322d92904ecdffcd9c1aa537f3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 20 Apr 2026 16:21:05 +0530 Subject: [PATCH 1/9] fix: rename to smartQueries --- scrape.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scrape.py b/scrape.py index 031b190..a0df996 100644 --- a/scrape.py +++ b/scrape.py @@ -16,7 +16,7 @@ async def create( name: str, url: str, formats: Optional[List[Format]] = None, - prompt_instructions: Optional[str] = None, + smartQueries: Optional[str] = None, ) -> Robot: """ Create a scrape robot. @@ -24,7 +24,7 @@ async def create( :param name: Robot name. :param url: URL to scrape. :param formats: Output formats (default: ["markdown"]). - :param prompt_instructions: Optional Smart Queries prompt. After scraping the + :param smartQueries: Optional Smart Queries prompt. After scraping the LLM analyzes the page and returns an answer. Adds 2 extra credits per run on top of the base 1 scrape credit. """ @@ -40,8 +40,8 @@ async def create( "url": url, "formats": formats or ["markdown"], } - if prompt_instructions: - meta["promptInstructions"] = prompt_instructions.strip() + if smartQueries: + meta["smartQueries"] = smartQueries.strip() workflow_file: WorkflowFile = { "meta": meta, From 23f1496ade8228adce956bf1a0e4eca450a3c6a4 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Mon, 20 Apr 2026 16:23:17 +0530 Subject: [PATCH 2/9] chore: v0.0.5 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 448dbf7..a1210b2 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "maxun" -version = "0.0.4" +version = "0.0.5" description = "Python SDK for Maxun - web automation and data extraction" requires-python = ">=3.8" license = { text = "MIT" } From 644b78ee12a37a9a71f0a7bbc661869293aac316 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Mon, 20 Apr 2026 16:43:59 +0530 Subject: [PATCH 3/9] fix: snake case python --- scrape.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/scrape.py b/scrape.py index a0df996..a80f70c 100644 --- a/scrape.py +++ b/scrape.py @@ -16,7 +16,7 @@ async def create( name: str, url: str, formats: Optional[List[Format]] = None, - smartQueries: Optional[str] = None, + smart_queries: Optional[str] = None, ) -> Robot: """ Create a scrape robot. @@ -24,9 +24,8 @@ async def create( :param name: Robot name. :param url: URL to scrape. :param formats: Output formats (default: ["markdown"]). - :param smartQueries: Optional Smart Queries prompt. After scraping the - LLM analyzes the page and returns an answer. Adds 2 extra credits per run - on top of the base 1 scrape credit. + :param smart_queries: Optional Smart Queries prompt. After scraping the + LLM analyzes the page and returns an answer. """ if not url: raise ValueError("URL is required") @@ -40,8 +39,8 @@ async def create( "url": url, "formats": formats or ["markdown"], } - if smartQueries: - meta["smartQueries"] = smartQueries.strip() + if smart_queries: + meta["smartQueries"] = smart_queries.strip() workflow_file: WorkflowFile = { "meta": meta, From a4dd5f87c0c74cda79554d6a551e750fca0114df Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Fri, 24 Apr 2026 13:36:07 +0530 Subject: [PATCH 4/9] feat: add support for document extraction --- client.py | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) diff --git a/client.py b/client.py index 3d371cb..2daa8d9 100644 --- a/client.py +++ b/client.py @@ -1,7 +1,8 @@ import time +import os import httpx from datetime import datetime, timezone -from typing import Optional +from typing import Optional, Union from .types import Config, MaxunError @@ -161,6 +162,41 @@ async def extract_with_llm(self, options: dict): self.client.post("/extract/llm", json=options, timeout=300) ) + async def create_document_robot( + self, + file: Union[str, bytes], + prompt: str, + robot_name: Optional[str] = None, + ollama_model: Optional[str] = None, + file_name: Optional[str] = None, + ) -> dict: + """Create a document-extraction robot from a PDF file path or bytes.""" + if isinstance(file, str): + file_name = file_name or os.path.basename(file) + with open(file, 'rb') as f: + file_bytes = f.read() + else: + file_bytes = file + file_name = file_name or 'document.pdf' + + data = {'prompt': prompt} + if robot_name: + data['robotName'] = robot_name + if ollama_model: + data['ollamaModel'] = ollama_model + + response = await self.client.post( + '/robots/document', + files={'file': (file_name, file_bytes, 'application/pdf')}, + data=data, + timeout=120, + ) + response.raise_for_status() + body = response.json() + if not body.get('data') and not body.get('robot'): + raise MaxunError('Failed to create document robot') + return body + async def create_crawl_robot(self, url: str, options: dict): return await self._handle( self.client.post("/crawl", json={"url": url, **options}) From 1be3bb4fd876292cf397911cd7f742ec6a9afed5 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Tue, 28 Apr 2026 11:55:27 +0530 Subject: [PATCH 5/9] chore: add links format --- types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/types.py b/types.py index 644b353..ae3e2ce 100644 --- a/types.py +++ b/types.py @@ -7,7 +7,7 @@ RobotType = Literal["extract", "scrape", "crawl", "search"] RobotMode = Literal["normal", "bulk"] -Format = Literal["markdown", "html", "screenshot-visible", "screenshot-fullpage"] +Format = Literal["markdown", "html", "text", "links", "screenshot-visible", "screenshot-fullpage"] RunStatus = Literal["running", "queued", "success", "failed", "aborting", "aborted"] TimeUnit = Literal["MINUTES", "HOURS", "DAYS", "WEEKS", "MONTHS"] CrawlMode = Literal["domain", "subdomain", "path"] From ec72656556aabc56b7de5566a39fafa2b86317ed Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Tue, 28 Apr 2026 16:35:35 +0530 Subject: [PATCH 6/9] chore: v0.0.6 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a1210b2..a9dc6e7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "maxun" -version = "0.0.5" +version = "0.0.6" description = "Python SDK for Maxun - web automation and data extraction" requires-python = ">=3.8" license = { text = "MIT" } From bdafdcd87e637389e65b7921317e3fbdf1ec8b89 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Thu, 7 May 2026 17:22:56 +0530 Subject: [PATCH 7/9] feat: add document parsing support --- client.py | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/client.py b/client.py index 2daa8d9..8124e55 100644 --- a/client.py +++ b/client.py @@ -12,7 +12,8 @@ def __init__(self, config: Config): headers = { "x-api-key": self.api_key, - "Content-Type": "application/json", + # Content-Type is intentionally omitted here so httpx can set it + # correctly per request (e.g. multipart/form-data for file uploads) } if config.team_id: @@ -197,6 +198,47 @@ async def create_document_robot( raise MaxunError('Failed to create document robot') return body + async def create_document_parse_robot( + self, + file: Union[str, bytes], + output_formats: list, + robot_name: Optional[str] = None, + file_name: Optional[str] = None, + ) -> dict: + """Create a document-parse robot from a PDF file path or bytes.""" + if isinstance(file, str): + file_name = file_name or os.path.basename(file) + with open(file, 'rb') as f: + file_bytes = f.read() + else: + file_bytes = file + file_name = file_name or 'document.pdf' + + valid_formats = {'markdown', 'html', 'links'} + filtered = [f for f in output_formats if f in valid_formats] + if not filtered: + raise MaxunError('At least one valid output format is required (markdown, html, links)') + + data = {} + if robot_name: + data['robotName'] = robot_name + + files_payload = [('file', (file_name, file_bytes, 'application/pdf'))] + for fmt in filtered: + files_payload.append(('outputFormats[]', (None, fmt))) + + response = await self.client.post( + '/robots/document-parse', + files=files_payload, + data=data, + timeout=120, + ) + response.raise_for_status() + body = response.json() + if not body.get('data') and not body.get('robot'): + raise MaxunError('Failed to create document-parse robot') + return body + async def create_crawl_robot(self, url: str, options: dict): return await self._handle( self.client.post("/crawl", json={"url": url, **options}) From 30df5b4252b5ee787cd675268588c9e948c3a6bc Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Fri, 8 May 2026 12:19:19 +0530 Subject: [PATCH 8/9] chore: rename doc methods --- client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/client.py b/client.py index 8124e55..9b686ac 100644 --- a/client.py +++ b/client.py @@ -163,7 +163,7 @@ async def extract_with_llm(self, options: dict): self.client.post("/extract/llm", json=options, timeout=300) ) - async def create_document_robot( + async def create_document_extract_robot( self, file: Union[str, bytes], prompt: str, From d6c9f52ce20fda36ef85aead0c59e828663f1e21 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Mon, 11 May 2026 00:40:04 +0530 Subject: [PATCH 9/9] chore: v0.0.7 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index a9dc6e7..ef8b2dc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "maxun" -version = "0.0.6" +version = "0.0.7" description = "Python SDK for Maxun - web automation and data extraction" requires-python = ">=3.8" license = { text = "MIT" }