Skip to content

Commit 8c12800

Browse files
committed
wip: v0.0.1
0 parents  commit 8c12800

10 files changed

Lines changed: 492 additions & 0 deletions

File tree

__init__.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
"""
2+
Maxun SDK - Unified package for web automation and data extraction
3+
"""
4+
5+
from .extract import Extract
6+
from .scrape import Scrape
7+
from .crawl import Crawl
8+
from .search import Search
9+
10+
# These would come from other modules
11+
from .robot import Robot
12+
from .client import Client
13+
14+
from .types import *

builders/extract_builder.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
from typing import Dict, Optional
2+
from .workflow_builder import WorkflowBuilder
3+
4+
5+
class ExtractBuilder(WorkflowBuilder):
6+
def __init__(self, name: str):
7+
super().__init__(name, "extract")
8+
self._extractor = None
9+
10+
def set_extractor(self, extractor):
11+
self._extractor = extractor
12+
return self
13+
14+
def capture_text(self, fields: Dict[str, str], name: Optional[str] = None):
15+
return self._add_action("scrapeSchema", [fields], name)
16+
17+
def capture_list(self, config: dict, name: Optional[str] = None):
18+
scrape_list_config = {
19+
"itemSelector": config["selector"],
20+
"maxItems": config.get("maxItems", 100),
21+
}
22+
23+
if config.get("pagination"):
24+
scrape_list_config["pagination"] = {
25+
"type": config["pagination"]["type"],
26+
"selector": config["pagination"].get("selector"),
27+
}
28+
29+
return self._add_action("scrapeList", [scrape_list_config], name)
30+
31+
# Make builder awaitable
32+
def __await__(self):
33+
if not self._extractor:
34+
raise RuntimeError(
35+
"Builder not properly initialized. Use extractor.create()."
36+
)
37+
return self._extractor.build(self).__await__()

builders/workflow_builder.py

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
from typing import Any, Dict, List, Optional
2+
from ..types import RobotType, RobotMode, Format
3+
4+
5+
class WorkflowBuilder:
6+
def __init__(self, name: str, robot_type: RobotType):
7+
self.name = name
8+
self.robot_type = robot_type
9+
self.workflow: List[Dict[str, Any]] = []
10+
self.meta: Dict[str, Any] = {
11+
"name": name,
12+
"robotType": robot_type,
13+
}
14+
self.current_step: Optional[Dict[str, Any]] = None
15+
self._is_first_navigation = True
16+
17+
def navigate(self, url: str):
18+
main_step = {"where": {"url": url}, "what": []}
19+
20+
if self._is_first_navigation:
21+
about_blank_step = {
22+
"where": {"url": "about:blank"},
23+
"what": [
24+
{"action": "goto", "args": [url]},
25+
{"action": "waitForLoadState", "args": ["networkidle"]},
26+
],
27+
}
28+
29+
self.workflow.insert(0, main_step)
30+
self.workflow.append(about_blank_step)
31+
self._is_first_navigation = False
32+
else:
33+
self.workflow.insert(0, main_step)
34+
35+
self.current_step = main_step
36+
return self
37+
38+
def click(self, selector: str):
39+
return self._add_action("click", [selector])
40+
41+
def type(self, selector: str, text: str, input_type: Optional[str] = None):
42+
args = [selector, text, input_type] if input_type else [selector, text]
43+
return self._add_action("type", args)
44+
45+
def wait_for(self, selector: str, timeout: Optional[int] = None):
46+
return self._add_action(
47+
"waitForSelector",
48+
[selector, {"timeout": timeout or 30000}],
49+
)
50+
51+
def wait(self, milliseconds: int):
52+
return self._add_action("waitForTimeout", [milliseconds])
53+
54+
def capture_screenshot(self, name: Optional[str] = None, options: Optional[dict] = None):
55+
default = {
56+
"type": "png",
57+
"caret": "hide",
58+
"scale": "device",
59+
"timeout": 30000,
60+
"fullPage": True,
61+
"animations": "allow",
62+
}
63+
64+
args = options or default
65+
self._add_action("screenshot", [args], name)
66+
return self
67+
68+
def scroll(self, direction: str, distance: Optional[int] = None):
69+
return self._add_action("scroll", [{"direction": direction, "distance": distance}])
70+
71+
def set_cookies(self, cookies: List[Dict[str, str]]):
72+
if self.current_step:
73+
self.current_step["where"]["cookies"] = cookies
74+
return self
75+
76+
def mode(self, mode: RobotMode):
77+
self.meta["mode"] = mode
78+
return self
79+
80+
def format(self, formats: List[Format]):
81+
self.meta["formats"] = formats
82+
return self
83+
84+
def _add_action(self, action: str, args: list, name: Optional[str] = None):
85+
action_obj = {"action": action, "args": args}
86+
if name:
87+
action_obj["name"] = name
88+
89+
if not self.current_step:
90+
step = {"where": {}, "what": [action_obj]}
91+
self.workflow.insert(0, step)
92+
self.current_step = step
93+
else:
94+
self.current_step["what"].append(action_obj)
95+
96+
return self
97+
98+
def get_workflow_array(self):
99+
return self.workflow
100+
101+
def get_meta(self):
102+
return self.meta

client.py

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import httpx
2+
from typing import Any, Dict, Optional
3+
from .types import Config, MaxunError
4+
5+
6+
class Client:
7+
def __init__(self, config: Config):
8+
self.api_key = config.api_key
9+
10+
headers = {
11+
"x-api-key": self.api_key,
12+
"Content-Type": "application/json",
13+
}
14+
15+
if config.team_id:
16+
headers["x-team-id"] = config.team_id
17+
18+
self.base_url = config.base_url or "http://localhost:8080/api/sdk"
19+
20+
self.client = httpx.AsyncClient(
21+
base_url=self.base_url,
22+
headers=headers,
23+
timeout=30.0,
24+
)
25+
26+
async def _handle(self, request):
27+
try:
28+
response = await request
29+
response.raise_for_status()
30+
data = response.json()
31+
return data.get("data")
32+
except httpx.HTTPStatusError as e:
33+
try:
34+
payload = e.response.json()
35+
except Exception:
36+
payload = None
37+
raise MaxunError(
38+
payload.get("error") if payload else str(e),
39+
status_code=e.response.status_code,
40+
details=payload,
41+
)
42+
except httpx.RequestError as e:
43+
raise MaxunError("No response from server", details=str(e))
44+
45+
async def get_robots(self):
46+
return await self._handle(self.client.get("/robots"))
47+
48+
async def get_robot(self, robot_id: str):
49+
data = await self._handle(self.client.get(f"/robots/{robot_id}"))
50+
if not data:
51+
raise MaxunError(f"Robot {robot_id} not found", 404)
52+
return data
53+
54+
async def create_robot(self, workflow_file: dict):
55+
return await self._handle(
56+
self.client.post("/robots", json=workflow_file, timeout=120)
57+
)
58+
59+
async def update_robot(self, robot_id: str, updates: dict):
60+
return await self._handle(
61+
self.client.put(f"/robots/{robot_id}", json=updates)
62+
)
63+
64+
async def delete_robot(self, robot_id: str):
65+
await self._handle(self.client.delete(f"/robots/{robot_id}"))
66+
67+
async def execute_robot(self, robot_id: str, options: Optional[dict] = None):
68+
return await self._handle(
69+
self.client.post(
70+
f"/robots/{robot_id}/execute",
71+
json={
72+
"params": options.get("params") if options else None,
73+
"webhook": options.get("webhook") if options else None,
74+
},
75+
timeout=options.get("timeout", 300) if options else 300,
76+
)
77+
)
78+
79+
async def get_runs(self, robot_id: str):
80+
return await self._handle(self.client.get(f"/robots/{robot_id}/runs"))
81+
82+
async def get_run(self, robot_id: str, run_id: str):
83+
return await self._handle(
84+
self.client.get(f"/robots/{robot_id}/runs/{run_id}")
85+
)
86+
87+
async def abort_run(self, robot_id: str, run_id: str):
88+
await self._handle(
89+
self.client.post(f"/robots/{robot_id}/runs/{run_id}/abort")
90+
)
91+
92+
async def extract_with_llm(self, options: dict):
93+
return await self._handle(
94+
self.client.post("/extract/llm", json=options, timeout=300)
95+
)
96+
97+
async def create_crawl_robot(self, url: str, options: dict):
98+
return await self._handle(
99+
self.client.post("/crawl", json={"url": url, **options})
100+
)
101+
102+
async def create_search_robot(self, options: dict):
103+
return await self._handle(
104+
self.client.post("/search", json=options)
105+
)

crawl.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
from typing import Any
2+
from .client import Client
3+
from .types import Config, CrawlConfig
4+
from .robot import Robot
5+
6+
7+
class Crawl:
8+
def __init__(self, config: Config):
9+
self.client = Client(config)
10+
11+
async def create(self, name: str, url: str, crawl_config: CrawlConfig) -> Robot:
12+
if not url:
13+
raise ValueError("URL is required")
14+
15+
if not crawl_config:
16+
raise ValueError("Crawl configuration is required")
17+
18+
robot_data = await self.client.create_crawl_robot(
19+
url,
20+
{
21+
"name": name,
22+
"crawlConfig": crawl_config,
23+
},
24+
)
25+
26+
return Robot(self.client, robot_data)

extract.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
import time
2+
import random
3+
import string
4+
from typing import List, Optional
5+
6+
from .client import Client
7+
from .types import Config
8+
from .robot import Robot
9+
from .builders.extract_builder import ExtractBuilder
10+
11+
12+
class Extract:
13+
def __init__(self, config: Config):
14+
self.client = Client(config)
15+
16+
def create(self, name: str) -> ExtractBuilder:
17+
builder = ExtractBuilder(name)
18+
builder.set_extractor(self)
19+
return builder
20+
21+
async def build(self, builder: ExtractBuilder) -> Robot:
22+
workflow = builder.get_workflow_array()
23+
meta = builder.get_meta()
24+
25+
robot_id = f"robot_{int(time.time() * 1000)}_{self._random_string()}"
26+
meta["id"] = robot_id
27+
28+
workflow_file = {
29+
"meta": meta,
30+
"workflow": workflow,
31+
}
32+
33+
robot_data = await self.client.create_robot(workflow_file)
34+
return Robot(self.client, robot_data)
35+
36+
async def get_robots(self) -> List[Robot]:
37+
robots = await self.client.get_robots()
38+
extract_robots = [
39+
r for r in robots if r["recording_meta"]["robotType"] == "extract"
40+
]
41+
return [Robot(self.client, r) for r in extract_robots]
42+
43+
async def get_robot(self, robot_id: str) -> Robot:
44+
robot = await self.client.get_robot(robot_id)
45+
return Robot(self.client, robot)
46+
47+
async def delete_robot(self, robot_id: str) -> None:
48+
await self.client.delete_robot(robot_id)
49+
50+
async def extract(self, options: dict) -> Robot:
51+
robot_data = await self.client.extract_with_llm(options)
52+
robot = await self.client.get_robot(robot_data["robotId"])
53+
return Robot(self.client, robot)
54+
55+
def _random_string(self, length: int = 9) -> str:
56+
return "".join(random.choices(string.ascii_lowercase + string.digits, k=length))

robot.py

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
from typing import Optional
2+
from .client import Client
3+
4+
5+
class Robot:
6+
def __init__(self, client: Client, robot_data: dict):
7+
self.client = client
8+
self.robot_data = robot_data
9+
10+
@property
11+
def id(self) -> str:
12+
return self.robot_data["recording_meta"]["id"]
13+
14+
@property
15+
def name(self) -> str:
16+
return self.robot_data["recording_meta"]["name"]
17+
18+
async def run(self, options: Optional[dict] = None):
19+
return await self.client.execute_robot(self.id, options or {})
20+
21+
async def get_runs(self):
22+
return await self.client.get_runs(self.id)
23+
24+
async def get_run(self, run_id: str):
25+
return await self.client.get_run(self.id, run_id)
26+
27+
async def abort(self, run_id: str):
28+
await self.client.abort_run(self.id, run_id)
29+
30+
async def delete(self):
31+
await self.client.delete_robot(self.id)
32+
33+
async def refresh(self):
34+
self.robot_data = await self.client.get_robot(self.id)

0 commit comments

Comments
 (0)