feat: add --no-reasoning for ai agent to hide reasoning info
This commit is contained in:
+164
-2
@@ -69,6 +69,109 @@ ANSI_RESET = "\x1b[0m"
|
|||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
class _ThinkingIndicator:
|
||||||
|
"""Animated "thinking..." marquee for the AI agent console.
|
||||||
|
|
||||||
|
Design rationale:
|
||||||
|
- Encapsulated as its own class so the animation lifecycle (timer
|
||||||
|
thread, frame state, screen erase sequence) does not pollute the
|
||||||
|
surrounding console class.
|
||||||
|
- Runs in a daemon background thread driven by ``threading.Event`` so
|
||||||
|
``stop`` returns promptly even if the current frame is mid-sleep.
|
||||||
|
- Uses ANSI ``\\r`` plus a clearing escape sequence to overwrite the
|
||||||
|
previous frame in place, avoiding scrollback noise. The frames
|
||||||
|
cycle through 0/1/2/3 dots every 0.5s as requested.
|
||||||
|
- ``start``/``stop`` are idempotent so the higher-level console can
|
||||||
|
call ``stop`` defensively (e.g. on the fallback path) without
|
||||||
|
tracking whether a marquee is actually running.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Frame interval in seconds; matches the user-visible cadence.
|
||||||
|
_FRAME_INTERVAL = 0.5
|
||||||
|
# 0..3 dots, looping.
|
||||||
|
_FRAMES = ("thinking", "thinking.", "thinking..", "thinking...")
|
||||||
|
# ANSI escape that clears from the cursor to the end of the line; we
|
||||||
|
# combine it with a leading carriage return to redraw the frame in
|
||||||
|
# place.
|
||||||
|
_ERASE_LINE = "\r\x1b[K"
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
write_fn: Callable[[str], None],
|
||||||
|
gray_fn: Callable[[str], str],
|
||||||
|
) -> None:
|
||||||
|
self._write = write_fn
|
||||||
|
self._gray = gray_fn
|
||||||
|
self._stop_event = threading.Event()
|
||||||
|
self._thread: threading.Thread | None = None
|
||||||
|
# ``_active`` reflects whether a frame is currently visible on
|
||||||
|
# screen; ``stop`` uses it to decide whether to emit the final
|
||||||
|
# erase sequence.
|
||||||
|
self._active = False
|
||||||
|
# Guard against concurrent start/stop calls from different
|
||||||
|
# threads (e.g. content-delta handler vs. end_llm_stream).
|
||||||
|
self._lifecycle_lock = threading.Lock()
|
||||||
|
|
||||||
|
def start(self) -> None:
|
||||||
|
"""Begin the marquee in a background thread.
|
||||||
|
|
||||||
|
Calling ``start`` while already running is a no-op.
|
||||||
|
"""
|
||||||
|
with self._lifecycle_lock:
|
||||||
|
if self._thread is not None and self._thread.is_alive():
|
||||||
|
return
|
||||||
|
self._stop_event.clear()
|
||||||
|
self._active = True
|
||||||
|
thread = threading.Thread(
|
||||||
|
target=self._run,
|
||||||
|
name="ai-thinking-indicator",
|
||||||
|
daemon=True,
|
||||||
|
)
|
||||||
|
self._thread = thread
|
||||||
|
thread.start()
|
||||||
|
|
||||||
|
def stop(self) -> None:
|
||||||
|
"""Stop the marquee and erase the current frame from the screen.
|
||||||
|
|
||||||
|
Safe to call when not running.
|
||||||
|
"""
|
||||||
|
with self._lifecycle_lock:
|
||||||
|
thread = self._thread
|
||||||
|
if thread is None:
|
||||||
|
return
|
||||||
|
self._stop_event.set()
|
||||||
|
self._thread = None
|
||||||
|
# Wait for the worker outside the lifecycle lock so an in-flight
|
||||||
|
# ``_render_frame`` cannot deadlock against ``start`` from
|
||||||
|
# another thread.
|
||||||
|
thread.join()
|
||||||
|
if self._active:
|
||||||
|
# Wipe the last frame so the model's actual content begins on
|
||||||
|
# a clean line.
|
||||||
|
self._write(self._ERASE_LINE)
|
||||||
|
self._active = False
|
||||||
|
|
||||||
|
def _run(self) -> None:
|
||||||
|
"""Background loop: redraw the next frame every ``_FRAME_INTERVAL``."""
|
||||||
|
index = 0
|
||||||
|
while not self._stop_event.is_set():
|
||||||
|
self._render_frame(self._FRAMES[index % len(self._FRAMES)])
|
||||||
|
index += 1
|
||||||
|
# ``Event.wait`` returns immediately when ``set`` is called,
|
||||||
|
# so ``stop`` is responsive even mid-frame.
|
||||||
|
if self._stop_event.wait(self._FRAME_INTERVAL):
|
||||||
|
return
|
||||||
|
|
||||||
|
def _render_frame(self, label: str) -> None:
|
||||||
|
"""Emit one frame in place using carriage-return + erase-EOL."""
|
||||||
|
self._write(f"{self._ERASE_LINE}{self._gray(label)}")
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# AI agent console
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
class AIAgentConsole:
|
class AIAgentConsole:
|
||||||
"""Serialised terminal output for the standalone AI agent.
|
"""Serialised terminal output for the standalone AI agent.
|
||||||
|
|
||||||
@@ -84,11 +187,30 @@ class AIAgentConsole:
|
|||||||
output_stream: IO[str] | None = None,
|
output_stream: IO[str] | None = None,
|
||||||
keep_history: bool = False,
|
keep_history: bool = False,
|
||||||
use_color: bool = True,
|
use_color: bool = True,
|
||||||
|
show_reasoning: bool = True,
|
||||||
) -> None:
|
) -> None:
|
||||||
self._output = output_stream if output_stream is not None else sys.stdout
|
self._output = output_stream if output_stream is not None else sys.stdout
|
||||||
self._keep_history = keep_history
|
self._keep_history = keep_history
|
||||||
self._use_color = use_color
|
self._use_color = use_color
|
||||||
|
# ``show_reasoning`` controls whether the LLM's chain-of-thought
|
||||||
|
# ("reasoning") deltas are printed to the terminal. The final
|
||||||
|
# answer ("content") is always printed so operators can still see
|
||||||
|
# the action being chosen.
|
||||||
|
self._show_reasoning = show_reasoning
|
||||||
|
# ``_lock`` serialises whole act/game render blocks (coarse grain).
|
||||||
|
# ``_io_lock`` is a finer-grained mutex protecting just the
|
||||||
|
# ``self._output.write`` calls so the thinking-indicator background
|
||||||
|
# thread can interleave safely with the main rendering thread
|
||||||
|
# without being blocked by the coarse lock.
|
||||||
self._lock = threading.Lock()
|
self._lock = threading.Lock()
|
||||||
|
self._io_lock = threading.Lock()
|
||||||
|
# Animated "thinking..." marquee shown while reasoning output is
|
||||||
|
# suppressed. Created up-front so callers can ``start``/``stop``
|
||||||
|
# idempotently regardless of the show_reasoning flag.
|
||||||
|
self._thinking = _ThinkingIndicator(
|
||||||
|
write_fn=self._write,
|
||||||
|
gray_fn=self._gray,
|
||||||
|
)
|
||||||
|
|
||||||
@contextmanager
|
@contextmanager
|
||||||
def act_log(self, observation: dict[str, Any]) -> Iterator[None]:
|
def act_log(self, observation: dict[str, Any]) -> Iterator[None]:
|
||||||
@@ -106,13 +228,32 @@ class AIAgentConsole:
|
|||||||
|
|
||||||
def begin_llm_stream(self) -> None:
|
def begin_llm_stream(self) -> None:
|
||||||
self._write(self._gray("AI MODEL STREAM\n"))
|
self._write(self._gray("AI MODEL STREAM\n"))
|
||||||
|
# When reasoning output is hidden, immediately start the marquee
|
||||||
|
# so the user sees liveness while the model is "thinking" before
|
||||||
|
# any content delta arrives.
|
||||||
|
if not self._show_reasoning:
|
||||||
|
self._thinking.start()
|
||||||
|
|
||||||
def write_llm_delta(self, kind: str, text: str) -> None:
|
def write_llm_delta(self, kind: str, text: str) -> None:
|
||||||
if not text:
|
if not text:
|
||||||
return
|
return
|
||||||
|
# Skip "reasoning" deltas entirely when reasoning output is hidden;
|
||||||
|
# this keeps the terminal focused on the final answer for users
|
||||||
|
# who do not care about chain-of-thought traces.
|
||||||
|
if kind == "reasoning" and not self._show_reasoning:
|
||||||
|
return
|
||||||
|
# First non-reasoning delta means the model has started speaking
|
||||||
|
# the actual answer; tear down the marquee before printing so the
|
||||||
|
# animation does not collide with the content stream.
|
||||||
|
if kind == "content" and not self._show_reasoning:
|
||||||
|
self._thinking.stop()
|
||||||
self._write(self._gray(text))
|
self._write(self._gray(text))
|
||||||
|
|
||||||
def end_llm_stream(self) -> None:
|
def end_llm_stream(self) -> None:
|
||||||
|
# Defensive stop in case the request finished without ever
|
||||||
|
# producing a content delta (e.g. fallback path / error).
|
||||||
|
if not self._show_reasoning:
|
||||||
|
self._thinking.stop()
|
||||||
self._write(self._gray("\n"))
|
self._write(self._gray("\n"))
|
||||||
|
|
||||||
def announce_action(
|
def announce_action(
|
||||||
@@ -120,11 +261,17 @@ class AIAgentConsole:
|
|||||||
action: dict[str, Any],
|
action: dict[str, Any],
|
||||||
source: str = "model",
|
source: str = "model",
|
||||||
) -> None:
|
) -> None:
|
||||||
|
# Defensive stop: error / fallback paths bypass end_llm_stream, so
|
||||||
|
# we ensure the marquee never leaks into action / warning output.
|
||||||
|
self._thinking.stop()
|
||||||
body = json.dumps(action, ensure_ascii=False)
|
body = json.dumps(action, ensure_ascii=False)
|
||||||
self._write(f"\nAI ACTION ({source}) -> {body}\n")
|
self._write(f"\nAI ACTION ({source}) -> {body}\n")
|
||||||
self._write("~" * 60 + "\n\n")
|
self._write("~" * 60 + "\n\n")
|
||||||
|
|
||||||
def announce_warning(self, message: str) -> None:
|
def announce_warning(self, message: str) -> None:
|
||||||
|
# Same defensive stop as ``announce_action`` - warnings can fire
|
||||||
|
# before the LLM stream closes (HTTP error, JSON parse error...).
|
||||||
|
self._thinking.stop()
|
||||||
self._write(f"\nAI WARNING -> {message}\n")
|
self._write(f"\nAI WARNING -> {message}\n")
|
||||||
|
|
||||||
def _gray(self, text: str) -> str:
|
def _gray(self, text: str) -> str:
|
||||||
@@ -133,8 +280,12 @@ class AIAgentConsole:
|
|||||||
return f"{ANSI_GRAY}{text}{ANSI_RESET}"
|
return f"{ANSI_GRAY}{text}{ANSI_RESET}"
|
||||||
|
|
||||||
def _write(self, text: str) -> None:
|
def _write(self, text: str) -> None:
|
||||||
self._output.write(text)
|
# The thinking-indicator background thread writes from a different
|
||||||
self._output.flush()
|
# thread than the main /act handler; the fine-grained ``_io_lock``
|
||||||
|
# avoids tearing of escape sequences and keeps stdout consistent.
|
||||||
|
with self._io_lock:
|
||||||
|
self._output.write(text)
|
||||||
|
self._output.flush()
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
@@ -927,6 +1078,15 @@ def main() -> None:
|
|||||||
action="store_true",
|
action="store_true",
|
||||||
help="Disable ANSI gray coloring for streamed LLM output.",
|
help="Disable ANSI gray coloring for streamed LLM output.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--no-reasoning",
|
||||||
|
action="store_true",
|
||||||
|
help=(
|
||||||
|
"Hide the LLM's reasoning/chain-of-thought stream from the "
|
||||||
|
"terminal. The final answer (content) is still printed so "
|
||||||
|
"operators can see the chosen action."
|
||||||
|
),
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
if not args.api_key:
|
if not args.api_key:
|
||||||
@@ -944,6 +1104,7 @@ def main() -> None:
|
|||||||
console = AIAgentConsole(
|
console = AIAgentConsole(
|
||||||
keep_history=args.keep_history,
|
keep_history=args.keep_history,
|
||||||
use_color=not args.no_color,
|
use_color=not args.no_color,
|
||||||
|
show_reasoning=not args.no_reasoning,
|
||||||
)
|
)
|
||||||
service = AIAgentService(LLMClient(config), prompts, console=console)
|
service = AIAgentService(LLMClient(config), prompts, console=console)
|
||||||
server = create_server(args.host, args.port, service, default_player_id=args.player_id)
|
server = create_server(args.host, args.port, service, default_player_id=args.player_id)
|
||||||
@@ -956,6 +1117,7 @@ def main() -> None:
|
|||||||
f" base_url : {config.base_url}\n"
|
f" base_url : {config.base_url}\n"
|
||||||
f" player_id : {args.player_id}\n"
|
f" player_id : {args.player_id}\n"
|
||||||
f" stream : {'on' if config.stream else 'off'}\n"
|
f" stream : {'on' if config.stream else 'off'}\n"
|
||||||
|
f" reasoning : {'off (hidden)' if args.no_reasoning else 'on'}\n"
|
||||||
f" clear-screen: {'off (keep history)' if args.keep_history else 'on'}",
|
f" clear-screen: {'off (keep history)' if args.keep_history else 'on'}",
|
||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
flush=True,
|
flush=True,
|
||||||
|
|||||||
Reference in New Issue
Block a user