feat: add --hide-reasoning for ai agent to hide reasoning info

2026-05-12 15:51:17 +08:00
parent bc372c5ba1
commit e22586aa2f
1 changed files with 169 additions and 3 deletions
@@ -69,6 +69,109 @@ ANSI_RESET = "\x1b[0m"
 # ---------------------------------------------------------------------------


+class _ThinkingIndicator:
+    """Animated "thinking..." marquee for the AI agent console.
+
+    Design rationale:
+    - Encapsulated as its own class so the animation lifecycle (timer
+      thread, frame state, screen erase sequence) does not pollute the
+      surrounding console class.
+    - Runs in a daemon background thread driven by ``threading.Event`` so
+      ``stop`` returns promptly even if the current frame is mid-sleep.
+    - Uses ANSI ``\\r`` plus a clearing escape sequence to overwrite the
+      previous frame in place, avoiding scrollback noise. The frames
+      cycle through 0/1/2/3 dots every 0.5s as requested.
+    - ``start``/``stop`` are idempotent so the higher-level console can
+      call ``stop`` defensively (e.g. on the fallback path) without
+      tracking whether a marquee is actually running.
+    """
+
+    # Frame interval in seconds; matches the user-visible cadence.
+    _FRAME_INTERVAL = 0.5
+    # 0..3 dots, looping.
+    _FRAMES = ("thinking", "thinking.", "thinking..", "thinking...")
+    # ANSI escape that clears from the cursor to the end of the line; we
+    # combine it with a leading carriage return to redraw the frame in
+    # place.
+    _ERASE_LINE = "\r\x1b[K"
+
+    def __init__(
+        self,
+        write_fn: Callable[[str], None],
+        gray_fn: Callable[[str], str],
+    ) -> None:
+        self._write = write_fn
+        self._gray = gray_fn
+        self._stop_event = threading.Event()
+        self._thread: threading.Thread | None = None
+        # ``_active`` reflects whether a frame is currently visible on
+        # screen; ``stop`` uses it to decide whether to emit the final
+        # erase sequence.
+        self._active = False
+        # Guard against concurrent start/stop calls from different
+        # threads (e.g. content-delta handler vs. end_llm_stream).
+        self._lifecycle_lock = threading.Lock()
+
+    def start(self) -> None:
+        """Begin the marquee in a background thread.
+
+        Calling ``start`` while already running is a no-op.
+        """
+        with self._lifecycle_lock:
+            if self._thread is not None and self._thread.is_alive():
+                return
+            self._stop_event.clear()
+            self._active = True
+            thread = threading.Thread(
+                target=self._run,
+                name="ai-thinking-indicator",
+                daemon=True,
+            )
+            self._thread = thread
+            thread.start()
+
+    def stop(self) -> None:
+        """Stop the marquee and erase the current frame from the screen.
+
+        Safe to call when not running.
+        """
+        with self._lifecycle_lock:
+            thread = self._thread
+            if thread is None:
+                return
+            self._stop_event.set()
+            self._thread = None
+        # Wait for the worker outside the lifecycle lock so an in-flight
+        # ``_render_frame`` cannot deadlock against ``start`` from
+        # another thread.
+        thread.join()
+        if self._active:
+            # Wipe the last frame so the model's actual content begins on
+            # a clean line.
+            self._write(self._ERASE_LINE)
+            self._active = False
+
+    def _run(self) -> None:
+        """Background loop: redraw the next frame every ``_FRAME_INTERVAL``."""
+        index = 0
+        while not self._stop_event.is_set():
+            self._render_frame(self._FRAMES[index % len(self._FRAMES)])
+            index += 1
+            # ``Event.wait`` returns immediately when ``set`` is called,
+            # so ``stop`` is responsive even mid-frame.
+            if self._stop_event.wait(self._FRAME_INTERVAL):
+                return
+
+    def _render_frame(self, label: str) -> None:
+        """Emit one frame in place using carriage-return + erase-EOL."""
+        self._write(f"{self._ERASE_LINE}{self._gray(label)}")
+
+
+# ---------------------------------------------------------------------------
+# AI agent console
+# ---------------------------------------------------------------------------
+
+
 class AIAgentConsole:
    """Serialised terminal output for the standalone AI agent.

@@ -84,11 +187,30 @@ class AIAgentConsole:
        output_stream: IO[str] | None = None,
        keep_history: bool = False,
        use_color: bool = True,
+        show_reasoning: bool = True,
    ) -> None:
        self._output = output_stream if output_stream is not None else sys.stdout
        self._keep_history = keep_history
        self._use_color = use_color
+        # ``show_reasoning`` controls whether the LLM's chain-of-thought
+        # ("reasoning") deltas are printed to the terminal. The final
+        # answer ("content") is always printed so operators can still see
+        # the action being chosen.
+        self._show_reasoning = show_reasoning
+        # ``_lock`` serialises whole act/game render blocks (coarse grain).
+        # ``_io_lock`` is a finer-grained mutex protecting just the
+        # ``self._output.write`` calls so the thinking-indicator background
+        # thread can interleave safely with the main rendering thread
+        # without being blocked by the coarse lock.
        self._lock = threading.Lock()
+        self._io_lock = threading.Lock()
+        # Animated "thinking..." marquee shown while reasoning output is
+        # suppressed. Created up-front so callers can ``start``/``stop``
+        # idempotently regardless of the show_reasoning flag.
+        self._thinking = _ThinkingIndicator(
+            write_fn=self._write,
+            gray_fn=self._gray,
+        )

    @contextmanager
    def act_log(self, observation: dict[str, Any]) -> Iterator[None]:
@@ -106,13 +228,32 @@ class AIAgentConsole:

    def begin_llm_stream(self) -> None:
        self._write(self._gray("AI MODEL STREAM\n"))
+        # When reasoning output is hidden, immediately start the marquee
+        # so the user sees liveness while the model is "thinking" before
+        # any content delta arrives.
+        if not self._show_reasoning:
+            self._thinking.start()

    def write_llm_delta(self, kind: str, text: str) -> None:
        if not text:
            return
+        # Skip "reasoning" deltas entirely when reasoning output is hidden;
+        # this keeps the terminal focused on the final answer for users
+        # who do not care about chain-of-thought traces.
+        if kind == "reasoning" and not self._show_reasoning:
+            return
+        # First non-reasoning delta means the model has started speaking
+        # the actual answer; tear down the marquee before printing so the
+        # animation does not collide with the content stream.
+        if kind == "content" and not self._show_reasoning:
+            self._thinking.stop()
        self._write(self._gray(text))

    def end_llm_stream(self) -> None:
+        # Defensive stop in case the request finished without ever
+        # producing a content delta (e.g. fallback path / error).
+        if not self._show_reasoning:
+            self._thinking.stop()
        self._write(self._gray("\n"))

    def announce_action(
@@ -120,11 +261,17 @@ class AIAgentConsole:
        action: dict[str, Any],
        source: str = "model",
    ) -> None:
+        # Defensive stop: error / fallback paths bypass end_llm_stream, so
+        # we ensure the marquee never leaks into action / warning output.
+        self._thinking.stop()
        body = json.dumps(action, ensure_ascii=False)
        self._write(f"\nAI ACTION ({source}) -> {body}\n")
        self._write("~" * 60 + "\n\n")

    def announce_warning(self, message: str) -> None:
+        # Same defensive stop as ``announce_action`` - warnings can fire
+        # before the LLM stream closes (HTTP error, JSON parse error...).
+        self._thinking.stop()
        self._write(f"\nAI WARNING -> {message}\n")

    def _gray(self, text: str) -> str:
@@ -133,8 +280,12 @@ class AIAgentConsole:
        return f"{ANSI_GRAY}{text}{ANSI_RESET}"

    def _write(self, text: str) -> None:
-        self._output.write(text)
-        self._output.flush()
+        # The thinking-indicator background thread writes from a different
+        # thread than the main /act handler; the fine-grained ``_io_lock``
+        # avoids tearing of escape sequences and keeps stdout consistent.
+        with self._io_lock:
+            self._output.write(text)
+            self._output.flush()


 # ---------------------------------------------------------------------------
@@ -296,8 +447,11 @@ def _format_action_history(history: list[dict[str, Any]]) -> str:
        return "(no actions yet)"
    # The engine never produces unbounded history within a single hand, but
    # we cap defensively so a malformed payload cannot blow up token usage.
+    # The cap is sized to comfortably cover the worst realistic case (a
+    # 12-handed table running ~10 betting rounds within one hand) so the
+    # LLM never sees a silently truncated history at full ring tables.
    rows = []
-    for record in history[-32:]:
+    for record in history[-128:]:
        rows.append(
            f"- [{record.get('street')}] {record.get('player_id')} -> "
            f"{record.get('action')} amount={record.get('amount', 0)}"
@@ -927,6 +1081,16 @@ def main() -> None:
        action="store_true",
        help="Disable ANSI gray coloring for streamed LLM output.",
    )
+    parser.add_argument(
+        "--hide-reasoning",
+        action="store_true",
+        help=(
+            "Hide the LLM's reasoning/chain-of-thought stream from the "
+            "terminal. The model still performs reasoning; only its "
+            "terminal output is suppressed. The final answer (content) "
+            "is still printed so operators can see the chosen action."
+        ),
+    )
    args = parser.parse_args()

    if not args.api_key:
@@ -944,6 +1108,7 @@ def main() -> None:
    console = AIAgentConsole(
        keep_history=args.keep_history,
        use_color=not args.no_color,
+        show_reasoning=not args.hide_reasoning,
    )
    service = AIAgentService(LLMClient(config), prompts, console=console)
    server = create_server(args.host, args.port, service, default_player_id=args.player_id)
@@ -956,6 +1121,7 @@ def main() -> None:
        f"  base_url    : {config.base_url}\n"
        f"  player_id   : {args.player_id}\n"
        f"  stream      : {'on' if config.stream else 'off'}\n"
+        f"  reasoning   : {'hidden (output suppressed)' if args.hide_reasoning else 'visible'}\n"
        f"  clear-screen: {'off (keep history)' if args.keep_history else 'on'}",
        file=sys.stderr,
        flush=True,