Documentation Index
Fetch the complete documentation index at: https://piyushvyas.mintlify.app/llms.txt
Use this file to discover all available pages before exploring further.
Installation
pip install browser-agent-protocol
Requires Python 3.10+ and a running BAP server.
Dependencies: websockets >= 12.0, pydantic >= 2.0, aiohttp >= 3.9.0, anyio >= 4.0, httpx >= 0.27.0
Quick Start
Async API (recommended)
import asyncio
from browseragentprotocol import BAPClient, role, text, label
async def main():
async with BAPClient("ws://localhost:9222") as client:
await client.launch(browser="chromium", headless=True)
await client.create_page(url="https://example.com")
await client.click(role("button", "Submit"))
await client.fill(label("Email"), "user@example.com")
screenshot = await client.screenshot()
print(f"Screenshot: {len(screenshot.data)} bytes")
asyncio.run(main())
Sync API (scripts and notebooks)
from browseragentprotocol import BAPClientSync, role
with BAPClientSync("ws://localhost:9222") as client:
client.launch(browser="chromium", headless=True)
client.create_page(url="https://example.com")
client.click(role("button", "Submit"))
screenshot = client.screenshot()
Session Helper
from browseragentprotocol.context import bap_session, role
async with bap_session(
"ws://localhost:9222",
start_url="https://example.com"
) as client:
await client.click(role("button", "Accept"))
content = await client.content()
Semantic Selectors
from browseragentprotocol import role, text, label, css, xpath, test_id, ref
role("button", "Submit") # ARIA role + accessible name
text("Sign in") # Visible text content
label("Email address") # Associated label
test_id("submit-button") # data-testid attribute
ref("@submitBtn") # Stable ref from observe
css(".btn-primary") # CSS selector
xpath("//button[@type='submit']") # XPath expression
API Mapping
The Python SDK mirrors the TypeScript API with snake_case naming:
| TypeScript | Python |
|---|
client.connect() | await client.connect() |
client.launch({...}) | await client.launch(browser=..., headless=...) |
client.createPage({url}) | await client.create_page(url=...) |
client.navigate(url) | await client.navigate(url) |
client.click(selector) | await client.click(selector) |
client.fill(selector, value) | await client.fill(selector, value) |
client.observe({...}) | await client.observe(...) |
client.act(steps) | await client.act(steps) |
client.extract({...}) | await client.extract(...) |
client.screenshot() | await client.screenshot() |
client.listPages() | await client.list_pages() |
client.closeBrowser() | await client.close_browser() |
Agent Methods
observe
observation = await client.observe(
include_accessibility=True,
include_interactive_elements=True,
include_screenshot=True,
max_elements=50,
annotate_screenshot=True,
)
for element in observation.interactive_elements:
print(f"{element.ref}: {element.role} - {element.name}")
act
from browseragentprotocol import BAPClient
result = await client.act([
BAPClient.step("action/fill", {"selector": label("Email"), "value": "user@example.com"}),
BAPClient.step("action/fill", {"selector": label("Password"), "value": "secret123"}),
BAPClient.step("action/click", {"selector": role("button", "Sign In")}),
])
print(f"Completed {result.completed}/{result.total} steps")
print(f"Success: {result.success}")
data = await client.extract(
instruction="Extract all product names and prices",
schema={
"type": "array",
"items": {
"type": "object",
"properties": {
"name": {"type": "string"},
"price": {"type": "number"},
},
},
},
)
if data.success:
for product in data.data:
print(f"{product['name']}: ${product['price']}")
Multi-Context Support
context = await client.create_context(
context_id="user-session",
options={"viewport": {"width": 1920, "height": 1080}, "locale": "en-US"},
)
page = await client.create_page(url="https://example.com", context_id=context.context_id)
# Clean up
await client.destroy_context(context.context_id)
Frame Support
frames = await client.list_frames()
for frame in frames.frames:
print(f"{frame.frame_id}: {frame.url}")
await client.switch_frame(selector=css("iframe#payment"))
await client.fill(label("Card number"), "4242424242424242")
await client.main_frame()
Error Handling
from browseragentprotocol import (
BAPError,
BAPTimeoutError,
BAPElementNotFoundError,
BAPApprovalDeniedError,
)
try:
await client.click(role("button", "Missing"))
except BAPTimeoutError as e:
print(f"Timeout: {e.message}")
if e.retryable:
pass # Retry
except BAPElementNotFoundError as e:
print(f"Element not found: {e.details}")
print(f"Recovery hint: {e.recovery_hint}")
except BAPApprovalDeniedError as e:
print(f"Action denied: {e.message}")
except BAPError as e:
print(f"Error {e.code}: {e.message}")
Pydantic Models
All types use Pydantic v2 BaseModel with camelCase aliases for JSON compatibility:
from browseragentprotocol.types import InteractiveElement, AgentObserveResult
# Access fields with snake_case
element.action_hints # Python-style
element.actionHints # Also works (via populate_by_name)
CLI Utility
The Python SDK includes a basic CLI for testing connectivity:
# Test connection
bap connect ws://localhost:9222
# Get server info
bap info ws://localhost:9222 --json