tronikos · tronikos · Nov 1, 2025 · Oct 30, 2025 · Nov 1, 2025 · tronikos
diff --git a/.gitignore b/.gitignore
@@ -130,3 +130,6 @@ dmypy.json
 
 # Certificates generated by demo.py
 *.pem
+
+# recorded voice command with demo app
+voice_command.wav
diff --git a/README.md b/README.md
@@ -6,6 +6,8 @@ For a list of the most common commands you can send to the Android TV see: [TvKe
 For a full list see [remotemessage.proto](https://github.com/tronikos/androidtvremote2/blob/b4c49ac03043b1b9c40c2f2960e466d5a3b8bd67/src/androidtvremote2/remotemessage.proto#L90).
 In addition to commands you can send URLs to open apps registered to handle them. See [this guide](https://community.home-assistant.io/t/android-tv-remote-app-links-deep-linking-guide/567921) for how to find deep links for apps.
 
+Voice commands can also be sent as PCM 16-bit mono 8 kHz audio data.
+
 ## Credits
 
 - Official [implementation](https://android.googlesource.com/platform/external/google-tv-pairing-protocol/+/refs/heads/master) of the pairing protocol in Java
@@ -52,3 +54,7 @@ python src/demo.py
 python -m pip install build
 python -m build
 ```
+
+The voice demo requires the [PyAudio](https://pypi.org/project/PyAudio/) library.
+Depending on the target platform, [PortAudio](https://www.portaudio.com/) might have to be installed manually,
+see [PyAudio installation](https://people.csail.mit.edu/hubert/pyaudio/) for more information.
diff --git a/pyproject.toml b/pyproject.toml
@@ -19,6 +19,7 @@ test = [
     "pytest",
 ]
 demo = [
+    "pyaudio",
     "pynput",
     "zeroconf",
 ]
@@ -87,6 +88,10 @@ show_error_codes = true
 warn_incomplete_stub = true
 enable_error_code = ["ignore-without-code", "redundant-self", "truthy-iterable"]
 
+[[tool.mypy.overrides]]
+module = ["pyaudio"]
+ignore_missing_imports = true
+
 [tool.codespell]
 skip = "*.json,*.csv,*.lock,./.git/*,./.venv/*"
 check-filenames = true

diff --git a/src/androidtvremote2/__init__.py b/src/androidtvremote2/__init__.py
@@ -3,12 +3,14 @@
 from .androidtv_remote import AndroidTVRemote
 from .exceptions import CannotConnect, ConnectionClosed, InvalidAuth
 from .model import DeviceInfo, VolumeInfo
+from .voice_stream import VoiceStream
 
 __all__ = [
     "AndroidTVRemote",
     "CannotConnect",
     "ConnectionClosed",
     "DeviceInfo",
     "InvalidAuth",
+    "VoiceStream",
     "VolumeInfo",
 ]
diff --git a/src/androidtvremote2/androidtv_remote.py b/src/androidtvremote2/androidtv_remote.py
@@ -15,8 +15,9 @@
 from .const import LOGGER
 from .exceptions import CannotConnect, ConnectionClosed, InvalidAuth
 from .pairing import PairingProtocol
-from .remote import RemoteProtocol
+from .remote import VOICE_SESSION_TIMEOUT, RemoteProtocol
 from .remotemessage_pb2 import RemoteDirection
+from .voice_stream import VoiceStream
 
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -45,6 +46,7 @@ def __init__(
         pair_port: int = 6467,
         loop: asyncio.AbstractEventLoop | None = None,
         enable_ime: bool = True,
+        enable_voice: bool = False,
     ) -> None:
         """Initialize.
 
@@ -57,6 +59,7 @@ def __init__(
         :param loop: event loop. Used for connections and futures.
         :param enable_ime: Needed for getting current_app.
                Disable for devices that show 'Use keyboard on mobile device screen'.
+        :param enable_voice: Enable sending voice commands to the device.
         """
         self._client_name = client_name
         self._certfile = certfile
@@ -66,6 +69,7 @@ def __init__(
         self._pair_port = pair_port
         self._loop = loop or asyncio.get_running_loop()
         self._enable_ime = enable_ime
+        self._enable_voice = enable_voice
         self._transport: asyncio.Transport | None = None
         self._remote_message_protocol: RemoteProtocol | None = None
         self._pairing_message_protocol: PairingProtocol | None = None
@@ -125,6 +129,17 @@ def volume_info(self) -> VolumeInfo | None:
             return None
         return self._remote_message_protocol.volume_info
 
+    @property
+    def is_voice_enabled(self) -> bool | None:
+        """Whether voice commands are enabled on the Android TV.
+
+        Depends on the requested feature at AndroidTVRemote initialization and the supported
+        features of the device.
+        """
+        if not self._remote_message_protocol:
+            return None
+        return self._remote_message_protocol.is_voice_enabled
+
     def add_is_on_updated_callback(self, callback: Callable[[bool], None]) -> None:
         """Add a callback for when is_on is updated."""
         self._is_on_updated_callbacks.append(callback)
@@ -217,6 +232,7 @@ async def async_connect(self) -> None:
                     self._on_volume_info_updated,
                     self._loop,
                     self._enable_ime,
+                    self._enable_voice,
                 ),
                 self.host,
                 self._api_port,
@@ -400,3 +416,21 @@ def send_launch_app_command(self, app_link_or_app_id: str) -> None:
             raise ConnectionClosed("Called send_launch_app_command after disconnect")
         prefix = "" if urlparse(app_link_or_app_id).scheme else "market://launch?id="
         self._remote_message_protocol.send_launch_app_command(f"{prefix}{app_link_or_app_id}")
+
+    async def start_voice(self, timeout: float = VOICE_SESSION_TIMEOUT) -> VoiceStream:
+        """Start a streaming voice session.
+
+        A ``VoiceStream`` session wrapper is returned if the voice session can be established
+        within the given timeout. The session needs to be closed with ``end()`` (or through the
+        asynchronous context manager) before a new session is started.
+
+        :param timeout: optional timeout for session readiness. Defaults to 2 seconds.
+        :raises ConnectionClosed: if client is disconnected.
+        :raises asyncio.TimeoutError: if the device does not begin voice in time, or a voice
+                                      session is already in progress.
+        """
+        if not self._remote_message_protocol:
+            LOGGER.debug("Called start_voice after disconnect")
+            raise ConnectionClosed("Called start_voice after disconnect")
+        session_id = await self._remote_message_protocol.start_voice(timeout)
+        return VoiceStream(self._remote_message_protocol, session_id)
diff --git a/src/androidtvremote2/remote.py b/src/androidtvremote2/remote.py
@@ -8,13 +8,14 @@
 
 import asyncio
 from enum import IntFlag
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from google.protobuf import text_format
 from google.protobuf.message import DecodeError
 
 from .base import ProtobufProtocol
 from .const import LOGGER
+from .exceptions import ConnectionClosed
 from .remotemessage_pb2 import (
     RemoteDirection,
     RemoteEditInfo,
@@ -38,13 +39,23 @@
 KEYCODE_PREFIX = "KEYCODE_"
 TEXT_PREFIX = "text:"
 
+# Timeout in seconds to wait for `remote_voice_begin` after sending KEYCODE_SEARCH.
+VOICE_SESSION_TIMEOUT = 2.0
+# Voice data chunk size in bytes for the `remote_voice_payload` message.
+VOICE_CHUNK_SIZE = 20 * 1024
+# Minimum voice data chunk size in bytes. Shield TV did not accept lower chunk sizes.
+VOICE_CHUNK_MIN_SIZE = 8 * 1024
+
 
 class Feature(IntFlag):
     """Supported features."""
 
     PING = 2**0
     KEY = 2**1
     IME = 2**2
+    VOICE = 2**3
+    """Enables remote_voice_begin after sending KEYCODE_SEARCH"""
+    UNKNOWN_1 = 2**4
     POWER = 2**5
     VOLUME = 2**6
     APP_LINK = 2**9
@@ -67,6 +78,7 @@ def __init__(
         on_volume_info_updated: Callable[[VolumeInfo], None],
         loop: asyncio.AbstractEventLoop,
         enable_ime: bool,
+        enable_voice: bool,
     ) -> None:
         """Initialize.
 
@@ -78,14 +90,21 @@ def __init__(
         :param loop: event loop.
         :param enable_ime: Needed for getting current_app.
                Disable for devices that show 'Use keyboard on mobile device screen'.
+        :param enable_voice: Enable sending voice commands to the device.
         """
         super().__init__(on_con_lost)
         self._on_remote_started = on_remote_started
         self._on_is_on_updated = on_is_on_updated
         self._on_current_app_updated = on_current_app_updated
         self._on_volume_info_updated = on_volume_info_updated
         self._active_features = (
-            Feature.PING | Feature.KEY | Feature.POWER | Feature.VOLUME | Feature.APP_LINK | (Feature.IME if enable_ime else 0)
+            Feature.PING
+            | Feature.KEY
+            | Feature.POWER
+            | Feature.VOLUME
+            | Feature.APP_LINK
+            | (Feature.IME if enable_ime else 0)
+            | (Feature.VOICE if enable_voice else 0)
         )
         self.is_on = False
         self.current_app = ""
@@ -96,6 +115,17 @@ def __init__(
         self._loop = loop
         self._idle_disconnect_task: asyncio.Task[None] | None = None
         self._reset_idle_disconnect_task()
+        self._voice_lock = asyncio.Lock()
+        self._on_voice_begin: asyncio.Future[int] | None = None
+
+    @property
+    def is_voice_enabled(self) -> bool:
+        """Voice commands enabled.
+
+        Determined from requested features at initialization and the supported features on the
+        device.
+        """
+        return self._active_features & Feature.VOICE == Feature.VOICE
 
     def send_key_command(self, key_code: int | str, direction: int | str = RemoteDirection.SHORT) -> None:
         """Send a key press to Android TV.
@@ -158,7 +188,75 @@ def send_launch_app_command(self, app_link: str) -> None:
         msg.remote_app_link_launch_request.app_link = app_link
         self._send_message(msg)
 
-    def _handle_message(self, raw_msg: bytes) -> None:  # noqa: PLR0912
+    async def start_voice(self, timeout: float = VOICE_SESSION_TIMEOUT) -> int:
+        """Initiate a voice session and return the session id when ready.
+
+        Sends ``KEYCODE_SEARCH`` and waits for ``remote_voice_begin``. Also sends the
+        initial ``remote_voice_begin`` message back to the device, as required by the
+        protocol, so the caller can start streaming audio chunks.
+
+        :param timeout: Optional timeout in seconds for session readiness. Defaults to 2 seconds.
+        :raises ConnectionClosed: If the connection is lost.
+        :raises asyncio.TimeoutError: If the operation times out or a voice session is already in
+                                      progress.
+        :return: The voice session id, which must be used in later calls to ``send_voice_chunk``.
+        """
+        if self.transport is None or self.transport.is_closing():
+            raise ConnectionClosed("Connection has been lost")
+
+        if self._voice_lock.locked():
+            raise asyncio.TimeoutError("Voice session already in progress")
+
+        await self._voice_lock.acquire()
+
+        self._on_voice_begin = self._loop.create_future()
+        try:
+            self.send_key_command(RemoteKeyCode.KEYCODE_SEARCH)
+            session_id = await self._async_wait_for_future_or_con_lost(self._on_voice_begin, timeout)
+            if session_id is None:
+                raise ConnectionClosed("No voice session available")
+
+            begin = RemoteMessage()
+            begin.remote_voice_begin.session_id = session_id
+            self._send_message(begin)
+            return int(session_id)
+        except:
+            self._on_voice_begin = None
+            raise
+        finally:
+            self._voice_lock.release()
+
+    def send_voice_chunk(self, chunk: bytes, session_id: int) -> None:
+        """Send a chunk of PCM audio for the active voice session.
+
+        :param chunk: The PCM audio data chunk to be sent.
+        :param session_id: The voice session id.
+        :raises ConnectionClosed: If the connection is lost.
+        """
+        if self.transport is None or self.transport.is_closing():
+            raise ConnectionClosed("Connection has been lost")
+
+        # Pad chunk to minimum chunk size
+        if len(chunk) < VOICE_CHUNK_MIN_SIZE:
+            chunk = chunk + b"\x00" * (VOICE_CHUNK_MIN_SIZE - len(chunk))
+
+        # Limit chunk size, otherwise Android TV will close the connection
+        for i in range(0, len(chunk), VOICE_CHUNK_SIZE):
+            msg = RemoteMessage()
+            msg.remote_voice_payload.session_id = session_id
+            msg.remote_voice_payload.samples = chunk[i : i + VOICE_CHUNK_SIZE]
+            self._send_message(msg, False)  # disable logging of voice data
+
+    def end_voice(self, session_id: int) -> None:
+        """End the specified voice session.
+
+        :param session_id: The voice session id.
+        """
+        end = RemoteMessage()
+        end.remote_voice_end.session_id = session_id
+        self._send_message(end)
+
+    def _handle_message(self, raw_msg: bytes) -> None:  # noqa: PLR0912,PLR0915
         """Handle a message from the server."""
         self._reset_idle_disconnect_task()
         msg = RemoteMessage()
@@ -215,6 +313,11 @@ def _handle_message(self, raw_msg: bytes) -> None:  # noqa: PLR0912
         elif msg.HasField("remote_ping_request"):
             new_msg.remote_ping_response.val1 = msg.remote_ping_request.val1
             log_send = LOG_PING_REQUESTS
+        elif msg.HasField("remote_voice_begin"):
+            if self._on_voice_begin and not self._on_voice_begin.done():
+                self._on_voice_begin.set_result(msg.remote_voice_begin.session_id)
+            else:
+                LOGGER.debug("Ignoring remote_voice_begin: no client request available")
         else:
             LOGGER.debug("Unhandled: %s", text_format.MessageToString(msg, as_one_line=True))
 
@@ -237,3 +340,31 @@ async def _async_idle_disconnect(self) -> None:
             self.transport.close()
         if not self.on_con_lost.done():
             self.on_con_lost.set_result(Exception("Closed idle connection"))
+
+    async def _async_wait_for_future_or_con_lost(self, future: asyncio.Future[Any], timeout: float) -> Any:
+        """Wait for the future to finish, connection to be lost, or timeout occurs.
+
+        :param future: The future to wait for.
+        :param timeout: Timeout in seconds.
+
+        :raises ConnectionClosed: If the connection is lost or the future has an exception.
+        :raises asyncio.TimeoutError: If timeout is reached before completion.
+        """
+        tasks = {self.on_con_lost, future}
+
+        done, _pending = await asyncio.wait(tasks, timeout=timeout, return_when=asyncio.FIRST_COMPLETED)
+
+        # Check if timeout occurred (no tasks completed)
+        if not done:
+            if not future.done():
+                future.cancel()
+            LOGGER.debug("Timeout reached after %s seconds", timeout)
+            raise asyncio.TimeoutError(f"Operation timed out after {timeout} seconds")
+
+        # Check if future completed successfully
+        if future.done():
+            if future.exception():
+                raise ConnectionClosed("Waiting for future failed") from future.exception()
+            return future.result()
+
+        raise ConnectionClosed("Connection has been lost")
diff --git a/src/androidtvremote2/remotemessage.proto b/src/androidtvremote2/remotemessage.proto
@@ -36,15 +36,20 @@ message RemoteStart {
 }
 
 message RemoteVoiceEnd {
-
+  int32 session_id = 1;
 }
 
 message RemoteVoicePayload {
-
+  int32 session_id = 1;
+  // Audio configuration in RemoteVoiceBegin is unknown.
+  // Default audio sample payload is a sequence of 16-bit PCM, 8 kHz, mono samples, split into 20 KB messages.
+  bytes samples = 2;
 }
 
 message RemoteVoiceBegin {
-
+  int32 session_id = 1;
+  // Package name is sent from the Android device as a response to sending KEYCODE_SEARCH and not required when sending audio.
+  string package_name = 2;
 }
 
 message RemoteTextFieldStatus {

diff --git a/src/androidtvremote2/remotemessage_pb2.py b/src/androidtvremote2/remotemessage_pb2.py