Skip to main content
The Python SDK mirrors the TypeScript client API with Pythonic conventions. It supports both async and sync usage, making it ideal for notebooks, scripts, and Python-based AI agents.

Installation

pip install browser-agent-protocol
import asyncio
from browseragentprotocol import BAPClient, role, text, label

async def main():
    async with BAPClient("ws://localhost:9222") as client:
        # Launch browser
        await client.launch(browser="chromium", headless=True)

        # Create page and navigate
        await client.create_page(url="https://example.com")

        # Click using semantic selectors
        await client.click(role("button", "Submit"))

        # Fill form fields
        await client.fill(label("Email"), "user@example.com")

        # Take screenshot
        screenshot = await client.screenshot()
        print(f"Screenshot: {len(screenshot.data)} bytes")

        # Get accessibility tree
        tree = await client.accessibility()
        print(f"Found {len(tree.tree)} nodes")

asyncio.run(main())

Sync API (Scripts and Notebooks)

from browseragentprotocol import BAPClientSync, role

with BAPClientSync("ws://localhost:9222") as client:
    client.launch(browser="chromium", headless=True)
    client.create_page(url="https://example.com")

    client.click(role("button", "Submit"))
    screenshot = client.screenshot()

High-Level Session Helper

The bap_session context manager handles browser launch, page creation, and cleanup:
from browseragentprotocol.context import bap_session, role

async with bap_session(
    "ws://localhost:9222",
    start_url="https://example.com"
) as client:
    await client.click(role("button", "Accept"))
    content = await client.content()

Semantic Selectors

from browseragentprotocol import role, text, label, css, xpath, test_id, ref

# Recommended: semantic selectors
role("button", "Submit")           # ARIA role + accessible name
text("Sign in")                    # Visible text content
label("Email address")             # Associated label

# Developer-controlled identifiers
test_id("submit-button")           # data-testid attribute

# Stable element references
ref("@submitBtn")                  # Ref from agent/observe

# Fallback: CSS/XPath
css(".btn-primary")
xpath("//button[@type='submit']")

AI Agent Methods

observation = await client.observe(
    include_accessibility=True,
    include_interactive_elements=True,
    include_screenshot=True,
    max_elements=50,
    annotate_screenshot=True,
)

for element in observation.interactive_elements:
    print(f"{element.ref}: {element.role} - {element.name}")
    # @e1: button - Submit
    # @e2: textbox - Email
from browseragentprotocol import BAPClient

result = await client.act([
    BAPClient.step("action/fill", {
        "selector": label("Email"),
        "value": "user@example.com"
    }),
    BAPClient.step("action/fill", {
        "selector": label("Password"),
        "value": "secret123"
    }),
    BAPClient.step("action/click", {
        "selector": role("button", "Sign In")
    }),
])

print(f"Completed {result.completed}/{result.total} steps")
data = await client.extract(
    instruction="Extract all product names and prices",
    schema={
        "type": "array",
        "items": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "price": {"type": "number"},
            },
        },
    },
)

if data.success:
    for product in data.data:
        print(f"{product['name']}: ${product['price']}")

Multi-Context Support

Create isolated browser contexts with separate cookies and storage:
context = await client.create_context(
    context_id="user-session",
    options={
        "viewport": {"width": 1920, "height": 1080},
        "locale": "en-US",
    },
)

page = await client.create_page(
    url="https://example.com",
    context_id=context.context_id,
)

await client.destroy_context(context.context_id)

Frame Support

# List frames
frames = await client.list_frames()
for frame in frames.frames:
    print(f"{frame.frame_id}: {frame.url}")

# Switch to iframe
await client.switch_frame(selector=css("iframe#payment"))

# Interact within frame
await client.fill(label("Card number"), "4242424242424242")

# Return to main frame
await client.main_frame()

Error Handling

from browseragentprotocol import (
    BAPError,
    BAPTimeoutError,
    BAPElementNotFoundError,
)

try:
    await client.click(role("button", "Missing"))
except BAPTimeoutError as e:
    print(f"Timeout: {e.message}")
    if e.retryable:
        pass  # Safe to retry
except BAPElementNotFoundError as e:
    print(f"Element not found: {e.details}")
except BAPError as e:
    print(f"Error {e.code}: {e.message}")

Starting the Server

The Python SDK requires a running BAP server:
# Option 1: Via CLI (recommended)
npm i -g @browseragentprotocol/cli
bap open

# Option 2: Direct server
npx @browseragentprotocol/server-playwright --port 9222
The Python SDK connects to a BAP server over WebSocket. The server itself is a Node.js process — install it via npm.

Requirements

  • Python >= 3.10
  • A running BAP Playwright server