Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
name: CI

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
lint-and-test:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.12"]

steps:
- uses: actions/checkout@v4

- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}

- name: Install dependencies
run: |
pip install -e ".[all]"
pip install black mypy pytest

- name: Format check (black)
run: black --check .

- name: Type check (mypy)
run: mypy droidpilot/ tests/ --ignore-missing-imports

- name: Tests
run: pytest tests/ -v
30 changes: 23 additions & 7 deletions droidpilot/adb.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,18 @@ def tap(x: int, y: int) -> None:


def swipe(x1: int, y1: int, x2: int, y2: int, duration_ms: int = 300) -> None:
_run(["shell", "input", "swipe", str(x1), str(y1), str(x2), str(y2), str(duration_ms)])
_run(
[
"shell",
"input",
"swipe",
str(x1),
str(y1),
str(x2),
str(y2),
str(duration_ms),
]
)


def input_text(text: str) -> None:
Expand All @@ -95,12 +106,17 @@ def press_enter() -> None:


def open_app(package_name: str) -> None:
_run([
"shell", "monkey",
"-p", package_name,
"-c", "android.intent.category.LAUNCHER",
"1",
])
_run(
[
"shell",
"monkey",
"-p",
package_name,
"-c",
"android.intent.category.LAUNCHER",
"1",
]
)


def list_packages(name: str = "") -> list[str]:
Expand Down
48 changes: 28 additions & 20 deletions droidpilot/agent.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import json
import logging
import time
from typing import Any

from openai import OpenAI

Expand All @@ -8,6 +10,8 @@
from .actions import TOOLS
from .prompts import SYSTEM_PROMPT

logger = logging.getLogger("droidpilot")

SWIPE_OFFSETS = {
"up": (0, 1, 0, -1),
"down": (0, -1, 0, 1),
Expand Down Expand Up @@ -113,11 +117,11 @@ def run(prompt: str, model: str = "gpt-4o", max_steps: int = 30) -> str:

serial = adb.check_device()
screen_size = adb.get_screen_size()
print(f"Connected to device: {serial}")
print(f"Screen size: {screen_size[0]}x{screen_size[1]}")
print(f"Task: {prompt}\n")
logger.info("Connected to device: %s", serial)
logger.info("Screen size: %sx%s", screen_size[0], screen_size[1])
logger.info("Task: %s", prompt)

messages = [
messages: list[dict[str, Any]] = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": f"Task: {prompt}"},
]
Expand All @@ -126,16 +130,18 @@ def run(prompt: str, model: str = "gpt-4o", max_steps: int = 30) -> str:
try:
tree_text, ref_map = _get_ui_tree()
except Exception as e:
print(f" [!] Failed to read UI: {e}")
logger.warning("Failed to read UI: %s", e)
time.sleep(2)
continue

messages.append({
"role": "user",
"content": f"Current screen UI tree:\n```\n{tree_text}\n```",
})
messages.append(
{
"role": "user",
"content": f"Current screen UI tree:\n```\n{tree_text}\n```",
}
)

response = client.chat.completions.create(
response = client.chat.completions.create( # type: ignore[call-overload]
model=model,
messages=messages,
tools=TOOLS,
Expand All @@ -146,28 +152,30 @@ def run(prompt: str, model: str = "gpt-4o", max_steps: int = 30) -> str:
messages.append(message.model_dump(exclude_none=True))

if not message.tool_calls:
print(f" [!] No action returned, retrying...")
logger.warning("No action returned, retrying...")
continue

tool_call = message.tool_calls[0]
action_name = tool_call.function.name
action_args = json.loads(tool_call.function.arguments)

print(f" Step {step}: {action_name}({json.dumps(action_args)})")
logger.info("Step %d: %s(%s)", step, action_name, json.dumps(action_args))

if action_name == "done":
summary = action_args.get("summary", "Task completed.")
print(f"\nDone: {summary}")
logger.info("Done: %s", summary)
return summary

result = _execute_action(action_name, action_args, ref_map, screen_size)
print(f" → {result}")

messages.append({
"role": "tool",
"tool_call_id": tool_call.id,
"content": result,
})
logger.debug("→ %s", result)

messages.append(
{
"role": "tool",
"tool_call_id": tool_call.id,
"content": result or "",
}
)

time.sleep(UI_SETTLE_DELAY)

Expand Down
58 changes: 54 additions & 4 deletions droidpilot/main.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import logging
import os
import sys

Expand All @@ -7,8 +8,41 @@
from . import adb
from .agent import run

logger = logging.getLogger("droidpilot")

def main():

def _setup_logging(*, verbose: bool = False, quiet: bool = False) -> None:
"""Configure the ``droidpilot`` logger.

* Default level is INFO.
* ``--verbose`` lowers it to DEBUG and adds timestamps.
* ``--quiet`` raises it to WARNING.
* When stderr is not a TTY (e.g. piped to a file), timestamps are always
included so log lines can be correlated after the fact.
"""
if verbose:
level = logging.DEBUG
elif quiet:
level = logging.WARNING
else:
level = logging.INFO

is_tty = hasattr(sys.stderr, "isatty") and sys.stderr.isatty()

if verbose or not is_tty:
fmt = "%(asctime)s %(levelname)s %(message)s"
else:
fmt = "%(message)s"

handler = logging.StreamHandler(sys.stderr)
handler.setFormatter(logging.Formatter(fmt))

root = logging.getLogger("droidpilot")
root.setLevel(level)
root.addHandler(handler)


def main() -> None:
load_dotenv()

parser = argparse.ArgumentParser(
Expand All @@ -22,21 +56,37 @@ def main():
)
parser.add_argument("--max-steps", type=int, default=30)

verbosity = parser.add_mutually_exclusive_group()
verbosity.add_argument(
"-v",
"--verbose",
action="store_true",
help="show debug output with timestamps",
)
verbosity.add_argument(
"-q",
"--quiet",
action="store_true",
help="only show warnings and errors",
)

args = parser.parse_args()

_setup_logging(verbose=args.verbose, quiet=args.quiet)

try:
adb.check_adb_installed()
except RuntimeError as e:
print(f"Error: {e}", file=sys.stderr)
logger.error("Error: %s", e)
sys.exit(1)

try:
run(args.prompt, model=args.model, max_steps=args.max_steps)
except RuntimeError as e:
print(f"Error: {e}", file=sys.stderr)
logger.error("Error: %s", e)
sys.exit(1)
except KeyboardInterrupt:
print("\nAborted by user.")
logger.info("Aborted by user.")
sys.exit(0)


Expand Down
2 changes: 1 addition & 1 deletion droidpilot/ui_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def parse(xml_str: str) -> tuple[str, dict[int, tuple[int, int]]]:

def _walk(node: etree._Element, depth: int = 0) -> None:
bounds = _parse_bounds(node.get("bounds", ""))
if not _is_visible(bounds):
if not _is_visible(bounds) or bounds is None:
return

class_name = _short_class_name(node.get("class", ""))
Expand Down
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,5 +9,9 @@ dependencies = [
"python-dotenv>=1.0",
]

[project.optional-dependencies]
anthropic = ["anthropic>=0.30"]
all = ["anthropic>=0.30"]

[project.scripts]
droidpilot = "droidpilot.main:main"
Empty file added tests/__init__.py
Empty file.
53 changes: 53 additions & 0 deletions tests/test_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
"""Tests for droidpilot logging configuration."""

import logging

from droidpilot.main import _setup_logging


def _reset_logger() -> None:
"""Remove all handlers from the droidpilot logger."""
logger = logging.getLogger("droidpilot")
logger.handlers.clear()
logger.setLevel(logging.WARNING)


class TestSetupLogging:
"""Tests for the _setup_logging helper."""

def teardown_method(self) -> None:
_reset_logger()

def test_default_level_is_info(self) -> None:
_setup_logging()
logger = logging.getLogger("droidpilot")
assert logger.level == logging.INFO

def test_verbose_sets_debug(self) -> None:
_setup_logging(verbose=True)
logger = logging.getLogger("droidpilot")
assert logger.level == logging.DEBUG

def test_quiet_sets_warning(self) -> None:
_setup_logging(quiet=True)
logger = logging.getLogger("droidpilot")
assert logger.level == logging.WARNING

def test_handler_streams_to_stderr(self) -> None:
_setup_logging()
logger = logging.getLogger("droidpilot")
assert len(logger.handlers) == 1
assert isinstance(logger.handlers[0], logging.StreamHandler)

def test_verbose_format_includes_timestamp(self) -> None:
_setup_logging(verbose=True)
logger = logging.getLogger("droidpilot")
formatter = logger.handlers[0].formatter
assert formatter is not None
assert formatter._fmt is not None and "asctime" in formatter._fmt

def test_agent_logger_is_child(self) -> None:
"""agent.py's logger should inherit from the droidpilot logger."""
from droidpilot.agent import logger as agent_logger

assert agent_logger.name == "droidpilot"
Loading