Skip to main content

Documentation Index

Fetch the complete documentation index at: https://piyushvyas.mintlify.app/llms.txt

Use this file to discover all available pages before exploring further.

The Python SDK mirrors the TypeScript client API with Pythonic conventions. It supports both async and sync usage, making it ideal for notebooks, scripts, and Python-based AI agents.

Installation

pip install browser-agent-protocol
import asyncio
from browseragentprotocol import BAPClient, role, text, label

async def main():
    async with BAPClient("ws://localhost:9222") as client:
        # Launch browser
        await client.launch(browser="chromium", headless=True)

        # Create page and navigate
        await client.create_page(url="https://example.com")

        # Click using semantic selectors
        await client.click(role("button", "Submit"))

        # Fill form fields
        await client.fill(label("Email"), "user@example.com")

        # Take screenshot
        screenshot = await client.screenshot()
        print(f"Screenshot: {len(screenshot.data)} bytes")

        # Get accessibility tree
        tree = await client.accessibility()
        print(f"Found {len(tree.tree)} nodes")

asyncio.run(main())

Sync API (Scripts and Notebooks)

from browseragentprotocol import BAPClientSync, role

with BAPClientSync("ws://localhost:9222") as client:
    client.launch(browser="chromium", headless=True)
    client.create_page(url="https://example.com")

    client.click(role("button", "Submit"))
    screenshot = client.screenshot()

High-Level Session Helper

The bap_session context manager handles browser launch, page creation, and cleanup:
from browseragentprotocol.context import bap_session, role

async with bap_session(
    "ws://localhost:9222",
    start_url="https://example.com"
) as client:
    await client.click(role("button", "Accept"))
    content = await client.content()

Semantic Selectors

from browseragentprotocol import role, text, label, css, xpath, test_id, ref

# Recommended: semantic selectors
role("button", "Submit")           # ARIA role + accessible name
text("Sign in")                    # Visible text content
label("Email address")             # Associated label

# Developer-controlled identifiers
test_id("submit-button")           # data-testid attribute

# Stable element references
ref("@submitBtn")                  # Ref from agent/observe

# Fallback: CSS/XPath
css(".btn-primary")
xpath("//button[@type='submit']")

AI Agent Methods

observation = await client.observe(
    include_accessibility=True,
    include_interactive_elements=True,
    include_screenshot=True,
    max_elements=50,
    annotate_screenshot=True,
)

for element in observation.interactive_elements:
    print(f"{element.ref}: {element.role} - {element.name}")
    # @e1: button - Submit
    # @e2: textbox - Email
from browseragentprotocol import BAPClient

result = await client.act([
    BAPClient.step("action/fill", {
        "selector": label("Email"),
        "value": "user@example.com"
    }),
    BAPClient.step("action/fill", {
        "selector": label("Password"),
        "value": "secret123"
    }),
    BAPClient.step("action/click", {
        "selector": role("button", "Sign In")
    }),
])

print(f"Completed {result.completed}/{result.total} steps")
data = await client.extract(
    instruction="Extract all product names and prices",
    schema={
        "type": "array",
        "items": {
            "type": "object",
            "properties": {
                "name": {"type": "string"},
                "price": {"type": "number"},
            },
        },
    },
)

if data.success:
    for product in data.data:
        print(f"{product['name']}: ${product['price']}")

Multi-Context Support

Create isolated browser contexts with separate cookies and storage:
context = await client.create_context(
    context_id="user-session",
    options={
        "viewport": {"width": 1920, "height": 1080},
        "locale": "en-US",
    },
)

page = await client.create_page(
    url="https://example.com",
    context_id=context.context_id,
)

await client.destroy_context(context.context_id)

Frame Support

# List frames
frames = await client.list_frames()
for frame in frames.frames:
    print(f"{frame.frame_id}: {frame.url}")

# Switch to iframe
await client.switch_frame(selector=css("iframe#payment"))

# Interact within frame
await client.fill(label("Card number"), "4242424242424242")

# Return to main frame
await client.main_frame()

Error Handling

from browseragentprotocol import (
    BAPError,
    BAPTimeoutError,
    BAPElementNotFoundError,
)

try:
    await client.click(role("button", "Missing"))
except BAPTimeoutError as e:
    print(f"Timeout: {e.message}")
    if e.retryable:
        pass  # Safe to retry
except BAPElementNotFoundError as e:
    print(f"Element not found: {e.details}")
except BAPError as e:
    print(f"Error {e.code}: {e.message}")

Starting the Server

The Python SDK requires a running BAP server:
# Option 1: Via CLI (recommended)
npm i -g @browseragentprotocol/cli
bap open

# Option 2: Direct server
npx @browseragentprotocol/server-playwright --port 9222
The Python SDK connects to a BAP server over WebSocket. The server itself is a Node.js process — install it via npm.

Requirements

  • Python >= 3.10
  • A running BAP Playwright server