feat: add human http agent

2026-05-11 19:53:40 +08:00
parent e46b2b84c5
commit 6014ec0707
5 changed files with 335 additions and 41 deletions
@@ -8,7 +8,7 @@ from typing import IO, Any
 from urllib.error import URLError
 from urllib.request import Request, urlopen

-from texas_holdem.human_io import prompt_action, render_observation
+from texas_holdem.human_io import clear_screen, prompt_action, render_observation
 from texas_holdem.models import Observation, PlayerAction


@@ -17,6 +17,16 @@ class PokerAgent(ABC):
    def decide(self, observation: Observation) -> PlayerAction:
        raise NotImplementedError

+    def on_game_update(self, game_state: dict[str, Any]) -> None:
+        """Optional hook called after every finished hand.
+
+        ``game_state`` is the same dict produced by :meth:`TableGame.to_dict`,
+        carrying the full ``hands`` history (including any showdown hole
+        cards). Default implementation is a no-op so most agents (random,
+        calling, in-process humans) need not care about it.
+        """
+        return None
+

 class RandomAgent(PokerAgent):
    def __init__(self, rng: Random | None = None) -> None:
@@ -45,26 +55,83 @@ class CallingStationAgent(PokerAgent):


 class HttpAgent(PokerAgent):
+    """Remote agent that talks to a base URL exposing ``/act`` and ``/game``.
+
+    The constructor takes a *base* URL (e.g. ``http://host:9001``) and
+    derives the per-purpose endpoints internally. This keeps the wire layout
+    a server-side concern and lets us evolve the protocol (add ``/init``,
+    ``/end``, ...) without touching every game spec.
+    """
+
+    ACT_PATH = "/act"
+    GAME_PATH = "/game"
+
    def __init__(self, endpoint: str, timeout_seconds: float = 10.0) -> None:
-        self.endpoint = endpoint
+        self.base_url = self._normalise_base_url(endpoint)
        self.timeout_seconds = timeout_seconds

+    @staticmethod
+    def _normalise_base_url(raw: str) -> str:
+        """Strip a trailing slash so URL joins do not produce double slashes.
+
+        Centralising this also tolerates the legacy "endpoint already points
+        at /act" mistake by chopping off a redundant ``/act`` suffix.
+        """
+        url = raw.rstrip("/")
+        if url.endswith("/act"):
+            url = url[: -len("/act")]
+        return url
+
+    def _url(self, path: str) -> str:
+        """Compose a full URL by joining the base with a path component."""
+        return f"{self.base_url}{path}"
+
    def decide(self, observation: Observation) -> PlayerAction:
-        body = json.dumps(observation.to_dict()).encode("utf-8")
+        payload = self._post_json(self.ACT_PATH, observation.to_dict())
+        if not isinstance(payload, dict):
+            raise RuntimeError("agent endpoint must return a JSON object")
+        return PlayerAction.from_dict(payload)
+
+    def on_game_update(self, game_state: dict[str, Any]) -> None:
+        """Push the post-hand snapshot to the remote ``/game`` endpoint.
+
+        We swallow failures (the engine's broadcaster also catches them) so
+        a single offline client cannot stall the table; failure is logged
+        only by way of the raised exception bubbling to the engine guard.
+        """
+        try:
+            self._post_json(self.GAME_PATH, game_state)
+        except RuntimeError:
+            # ``/game`` is informational; treat any HTTP error as a benign
+            # drop rather than reraising and aborting the hand loop.
+            return None
+
+    def _post_json(self, path: str, payload: dict[str, Any]) -> Any:
+        """POST ``payload`` as JSON to ``base_url + path`` and return parsed body.
+
+        Extracted as a tiny helper so ``decide`` and ``on_game_update`` share
+        identical transport semantics (timeout, error wrapping, content-type).
+        """
+        body = json.dumps(payload).encode("utf-8")
        request = Request(
-            self.endpoint,
+            self._url(path),
            data=body,
            headers={"Content-Type": "application/json"},
            method="POST",
        )
        try:
            with urlopen(request, timeout=self.timeout_seconds) as response:
-                payload: Any = json.loads(response.read().decode("utf-8"))
-        except (OSError, URLError, json.JSONDecodeError) as exc:
-            raise RuntimeError(f"agent endpoint failed: {self.endpoint}") from exc
-        if not isinstance(payload, dict):
-            raise RuntimeError("agent endpoint must return a JSON object")
-        return PlayerAction.from_dict(payload)
+                raw = response.read().decode("utf-8")
+        except (OSError, URLError) as exc:
+            raise RuntimeError(f"agent endpoint failed: {self._url(path)}") from exc
+        if not raw:
+            return None
+        try:
+            return json.loads(raw)
+        except json.JSONDecodeError as exc:
+            raise RuntimeError(
+                f"agent endpoint returned invalid JSON: {self._url(path)}"
+            ) from exc


 class HumanAgent(PokerAgent):
@@ -74,19 +141,27 @@ class HumanAgent(PokerAgent):
    drives an interactive menu so the operator can only emit legal actions.
    Streams are injected to keep the agent testable and to allow alternate
    consoles in the future (e.g. piping to a debug log).
+
+    By default the terminal is wiped at the start of each ``decide`` call so
+    every turn appears on a clean screen. Set ``keep_history=True`` to
+    preserve the scrollback (useful when piping to a log file).
    """

    def __init__(
        self,
        input_stream: IO[str] | None = None,
        output_stream: IO[str] | None = None,
+        keep_history: bool = False,
    ) -> None:
        self._input = input_stream if input_stream is not None else sys.stdin
        self._output = output_stream if output_stream is not None else sys.stdout
+        self._keep_history = keep_history

    def decide(self, observation: Observation) -> PlayerAction:
        # Convert to dict-form so the rendering/prompting code path is shared
        # with the standalone HTTP human client (see texas_holdem.human_io).
+        if not self._keep_history:
+            clear_screen(self._write)
        obs_dict = observation.to_dict()
        self._write(render_observation(obs_dict))
        chosen = prompt_action(