Skip to main content

Installation

pip install browser-agent-protocol
Requires Python 3.10+ and a running BAP server. Dependencies: websockets >= 12.0, pydantic >= 2.0, aiohttp >= 3.9.0, anyio >= 4.0, httpx >= 0.27.0

Quick Start

import asyncio
from browseragentprotocol import BAPClient, role, text, label

async def main():
    async with BAPClient("ws://localhost:9222") as client:
        await client.launch(browser="chromium", headless=True)
        await client.create_page(url="https://example.com")

        await client.click(role("button", "Submit"))
        await client.fill(label("Email"), "user@example.com")

        screenshot = await client.screenshot()
        print(f"Screenshot: {len(screenshot.data)} bytes")

asyncio.run(main())

Sync API (scripts and notebooks)

from browseragentprotocol import BAPClientSync, role

with BAPClientSync("ws://localhost:9222") as client:
    client.launch(browser="chromium", headless=True)
    client.create_page(url="https://example.com")
    client.click(role("button", "Submit"))
    screenshot = client.screenshot()

Session Helper

from browseragentprotocol.context import bap_session, role

async with bap_session(
    "ws://localhost:9222",
    start_url="https://example.com"
) as client:
    await client.click(role("button", "Accept"))
    content = await client.content()

Semantic Selectors

from browseragentprotocol import role, text, label, css, xpath, test_id, ref

role("button", "Submit")           # ARIA role + accessible name
text("Sign in")                    # Visible text content
label("Email address")             # Associated label
test_id("submit-button")           # data-testid attribute
ref("@submitBtn")                  # Stable ref from observe
css(".btn-primary")                # CSS selector
xpath("//button[@type='submit']")  # XPath expression

API Mapping

The Python SDK mirrors the TypeScript API with snake_case naming:
TypeScriptPython
client.connect()await client.connect()
client.launch({...})await client.launch(browser=..., headless=...)
client.createPage({url})await client.create_page(url=...)
client.navigate(url)await client.navigate(url)
client.click(selector)await client.click(selector)
client.fill(selector, value)await client.fill(selector, value)
client.observe({...})await client.observe(...)
client.act(steps)await client.act(steps)
client.extract({...})await client.extract(...)
client.screenshot()await client.screenshot()
client.listPages()await client.list_pages()
client.closeBrowser()await client.close_browser()

Agent Methods

observe

observation = await client.observe(
    include_accessibility=True,
    include_interactive_elements=True,
    include_screenshot=True,
    max_elements=50,
    annotate_screenshot=True,
)

for element in observation.interactive_elements:
    print(f"{element.ref}: {element.role} - {element.name}")

act

from browseragentprotocol import BAPClient

result = await client.act([
    BAPClient.step("action/fill", {"selector": label("Email"), "value": "user@example.com"}),
    BAPClient.step("action/fill", {"selector": label("Password"), "value": "secret123"}),
    BAPClient.step("action/click", {"selector": role("button", "Sign In")}),
])

print(f"Completed {result.completed}/{result.total} steps")
print(f"Success: {result.success}")

extract

data = await client.extract(
    instruction="Extract all product names and prices",
    schema={
        "type": "array",
        "items": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "price": {"type": "number"},
            },
        },
    },
)

if data.success:
    for product in data.data:
        print(f"{product['name']}: ${product['price']}")

Multi-Context Support

context = await client.create_context(
    context_id="user-session",
    options={"viewport": {"width": 1920, "height": 1080}, "locale": "en-US"},
)

page = await client.create_page(url="https://example.com", context_id=context.context_id)

# Clean up
await client.destroy_context(context.context_id)

Frame Support

frames = await client.list_frames()
for frame in frames.frames:
    print(f"{frame.frame_id}: {frame.url}")

await client.switch_frame(selector=css("iframe#payment"))
await client.fill(label("Card number"), "4242424242424242")
await client.main_frame()

Error Handling

from browseragentprotocol import (
    BAPError,
    BAPTimeoutError,
    BAPElementNotFoundError,
    BAPApprovalDeniedError,
)

try:
    await client.click(role("button", "Missing"))
except BAPTimeoutError as e:
    print(f"Timeout: {e.message}")
    if e.retryable:
        pass  # Retry
except BAPElementNotFoundError as e:
    print(f"Element not found: {e.details}")
    print(f"Recovery hint: {e.recovery_hint}")
except BAPApprovalDeniedError as e:
    print(f"Action denied: {e.message}")
except BAPError as e:
    print(f"Error {e.code}: {e.message}")

Pydantic Models

All types use Pydantic v2 BaseModel with camelCase aliases for JSON compatibility:
from browseragentprotocol.types import InteractiveElement, AgentObserveResult

# Access fields with snake_case
element.action_hints   # Python-style
element.actionHints    # Also works (via populate_by_name)

CLI Utility

The Python SDK includes a basic CLI for testing connectivity:
# Test connection
bap connect ws://localhost:9222

# Get server info
bap info ws://localhost:9222 --json