feat: add ai agent http agent
This commit is contained in:
@@ -10,6 +10,7 @@
|
||||
- 观察信息包含玩家筹码、公共牌、当前玩家手牌、底池、历史动作、可用动作和跟注/加注边界。
|
||||
- 支持盲注、四条街下注、弃牌、过牌、跟注、下注、加注、全下、边池和摊牌结算。
|
||||
- 支持本地 Agent 和 HTTP Agent。
|
||||
- 支持 Human Agent 和 OpenAI-compatible AI Agent 的终端过程输出。
|
||||
|
||||
## 运行服务
|
||||
|
||||
@@ -60,7 +61,7 @@ curl http://127.0.0.1:8000/games/demo
|
||||
"name": "LLM Agent",
|
||||
"agent": {
|
||||
"type": "http",
|
||||
"endpoint": "http://127.0.0.1:9001/act",
|
||||
"endpoint": "http://127.0.0.1:9101",
|
||||
"timeout_seconds": 10
|
||||
}
|
||||
}
|
||||
@@ -83,8 +84,31 @@ curl http://127.0.0.1:8000/games/demo
|
||||
|
||||
`bet` 和 `raise` 的 `amount` 表示当前下注轮中该玩家希望达到的总下注额,也就是观察中 `amount_mode: "street_total"` 的含义。
|
||||
|
||||
## AI Agent
|
||||
|
||||
启动一个可接入 OpenAI-compatible Chat Completions API 的 AI Agent:
|
||||
|
||||
```bash
|
||||
python -m texas_holdem.ai_client \
|
||||
--host 127.0.0.1 \
|
||||
--port 9101 \
|
||||
--base-url https://api.openai.com/v1 \
|
||||
--api-key "$OPENAI_API_KEY" \
|
||||
--model gpt-4o-mini \
|
||||
--keep-history
|
||||
```
|
||||
|
||||
AI Agent 会在终端输出:
|
||||
|
||||
- 收到的 `/game` 游戏快照;
|
||||
- 收到的 `/act` 行动请求;
|
||||
- 大模型流式返回内容,默认灰色显示;
|
||||
- 最终解析出的 action,或失败时的 fallback action。
|
||||
|
||||
默认每次 `/act` 会清屏,和 Human Agent 一致;加 `--keep-history` 后保留历史滚动输出。可用 `--no-stream` 关闭流式请求,用 `--no-color` 关闭灰色 ANSI 输出。
|
||||
|
||||
## 测试
|
||||
|
||||
```bash
|
||||
python -m unittest discover -s tests -v
|
||||
python -m unittest discover -v
|
||||
```
|
||||
|
||||
@@ -8,6 +8,7 @@ dependencies = []
|
||||
[project.scripts]
|
||||
texas-holdem-server = "texas_holdem.server:main"
|
||||
texas-holdem-human = "texas_holdem.human_client:main"
|
||||
texas-holdem-ai = "texas_holdem.ai_client:main"
|
||||
|
||||
[tool.pytest.ini_options]
|
||||
testpaths = ["tests"]
|
||||
|
||||
@@ -0,0 +1,150 @@
|
||||
import io
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from texas_holdem.ai_client import (
|
||||
AIAgentConsole,
|
||||
AIAgentService,
|
||||
LLMClient,
|
||||
LLMConfig,
|
||||
PromptLibrary,
|
||||
_iter_sse_payloads,
|
||||
)
|
||||
|
||||
|
||||
class FakeLLM(LLMClient):
|
||||
def __init__(self, reply: str) -> None:
|
||||
super().__init__(
|
||||
LLMConfig(
|
||||
base_url="http://example.test/v1",
|
||||
api_key="test",
|
||||
model="fake",
|
||||
)
|
||||
)
|
||||
self.reply = reply
|
||||
self.calls: list[list[dict[str, Any]]] = []
|
||||
|
||||
def chat(self, messages, on_delta=None): # type: ignore[no-untyped-def]
|
||||
self.calls.append(messages)
|
||||
if on_delta:
|
||||
on_delta("reasoning", "counting outs... ")
|
||||
on_delta("content", self.reply)
|
||||
return self.reply
|
||||
|
||||
|
||||
def prompt_library(path: Path) -> PromptLibrary:
|
||||
(path / "system.md").write_text("system", encoding="utf-8")
|
||||
(path / "game_start.md").write_text(
|
||||
"GAME {game_id} {hand_number} {status} {players_block} {history_block}",
|
||||
encoding="utf-8",
|
||||
)
|
||||
(path / "observation.md").write_text(
|
||||
"ACT {hand_number} {street} {player_id} {legal_actions_block}",
|
||||
encoding="utf-8",
|
||||
)
|
||||
return PromptLibrary(path)
|
||||
|
||||
|
||||
def game_state() -> dict[str, Any]:
|
||||
return {
|
||||
"game_id": "g1",
|
||||
"status": "running",
|
||||
"hand_number": 1,
|
||||
"small_blind": 5,
|
||||
"big_blind": 10,
|
||||
"button_seat": 0,
|
||||
"starting_stack": 100,
|
||||
"players": [
|
||||
{
|
||||
"player_id": "ai",
|
||||
"name": "AI",
|
||||
"seat": 0,
|
||||
"stack": 100,
|
||||
"in_hand": True,
|
||||
}
|
||||
],
|
||||
"hands": [],
|
||||
}
|
||||
|
||||
|
||||
def observation() -> dict[str, Any]:
|
||||
return {
|
||||
"game_id": "g1",
|
||||
"hand_number": 1,
|
||||
"street": "preflop",
|
||||
"player_id": "ai",
|
||||
"seat": 0,
|
||||
"button_seat": 0,
|
||||
"small_blind": 5,
|
||||
"big_blind": 10,
|
||||
"board": [],
|
||||
"hole_cards": ["As", "Ah"],
|
||||
"players": game_state()["players"],
|
||||
"pot": 15,
|
||||
"to_call": 10,
|
||||
"min_raise_to": 20,
|
||||
"legal_actions": [
|
||||
{"action": "fold", "amount": 0},
|
||||
{"action": "call", "amount": 10},
|
||||
{
|
||||
"action": "raise",
|
||||
"min_amount": 20,
|
||||
"max_amount": 100,
|
||||
"amount_mode": "street_total",
|
||||
},
|
||||
],
|
||||
"action_history": [],
|
||||
}
|
||||
|
||||
|
||||
class LineResponse:
|
||||
def __init__(self, lines: list[bytes]) -> None:
|
||||
self.lines = lines
|
||||
|
||||
def __iter__(self): # type: ignore[no-untyped-def]
|
||||
return iter(self.lines)
|
||||
|
||||
|
||||
class AIClientTests(unittest.TestCase):
|
||||
def test_iter_sse_payloads_handles_done_and_crlf(self) -> None:
|
||||
response = LineResponse(
|
||||
[
|
||||
b"data: {\"a\": 1}\r\n",
|
||||
b"\r\n",
|
||||
b"data: [DONE]\n",
|
||||
b"\n",
|
||||
]
|
||||
)
|
||||
|
||||
self.assertEqual(list(_iter_sse_payloads(response)), ['{"a": 1}', "[DONE]"])
|
||||
|
||||
def test_service_logs_game_act_stream_and_action(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as temp_dir:
|
||||
output = io.StringIO()
|
||||
console = AIAgentConsole(
|
||||
output_stream=output,
|
||||
keep_history=True,
|
||||
use_color=False,
|
||||
)
|
||||
service = AIAgentService(
|
||||
FakeLLM('{"action": "call", "amount": 10}'),
|
||||
prompt_library(Path(temp_dir)),
|
||||
console=console,
|
||||
)
|
||||
|
||||
service.handle_game(game_state(), player_id="ai")
|
||||
action = service.handle_act(observation())
|
||||
|
||||
text = output.getvalue()
|
||||
self.assertEqual(action, {"action": "call", "amount": 10})
|
||||
self.assertIn("GAME UPDATE", text)
|
||||
self.assertIn("Game g1 | Hand #1 | Street: preflop", text)
|
||||
self.assertIn("AI MODEL STREAM", text)
|
||||
self.assertIn("counting outs...", text)
|
||||
self.assertIn('AI ACTION (model) -> {"action": "call", "amount": 10}', text)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -0,0 +1,972 @@
|
||||
"""Standalone HTTP AI poker agent backed by an OpenAI-compatible LLM.
|
||||
|
||||
Run as a process exposing two endpoints that the Texas Hold'em service
|
||||
calls:
|
||||
|
||||
* ``POST /game`` - delivered at the start of every new hand. We use it as
|
||||
the boundary that opens a fresh chat session for that hand and seeds it
|
||||
with a human-readable rendering of the table snapshot (history of past
|
||||
hands included).
|
||||
* ``POST /act`` - the per-decision request. We render the observation
|
||||
with a templated user prompt, ask the configured LLM, parse the JSON
|
||||
reply and return it to the server.
|
||||
|
||||
Run::
|
||||
|
||||
python -m texas_holdem.ai_client \\
|
||||
--host 127.0.0.1 --port 9101 \\
|
||||
--base-url https://api.openai.com/v1 \\
|
||||
--api-key $OPENAI_API_KEY \\
|
||||
--model gpt-4o-mini
|
||||
|
||||
Hook it up by passing the *base* URL when creating the game::
|
||||
|
||||
{"id": "ai", "name": "AI",
|
||||
"agent": {"type": "http", "endpoint": "http://127.0.0.1:9101"}}
|
||||
|
||||
Design notes:
|
||||
- Prompts live in ``texas_holdem/prompts/*.md`` so non-engineers can edit
|
||||
them without touching code; we read them once at boot via
|
||||
:class:`PromptLibrary`.
|
||||
- :class:`LLMSession` owns the chat history for a single (game_id, hand)
|
||||
scope and is reset by every ``/game`` push, matching the user's mental
|
||||
model that one hand == one session.
|
||||
- :class:`SessionRegistry` keeps a tiny LRU per ``(game_id, player_id)``
|
||||
so the same process can serve multiple parallel games / seats.
|
||||
- The LLM client speaks OpenAI's ``/v1/chat/completions`` schema, which
|
||||
is what virtually every OpenAI-compatible provider implements.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import threading
|
||||
from collections import OrderedDict
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import dataclass, field
|
||||
from http import HTTPStatus
|
||||
from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
|
||||
from pathlib import Path
|
||||
from typing import IO, Any, Callable, Iterator
|
||||
from urllib.error import HTTPError, URLError
|
||||
from urllib.request import Request, urlopen
|
||||
|
||||
from texas_holdem.human_io import clear_screen, render_game_state, render_observation
|
||||
|
||||
# Default location of the prompt templates. Living next to this module lets
|
||||
# operators edit them in place without re-installing the package.
|
||||
PROMPTS_DIR = Path(__file__).resolve().parent / "prompts"
|
||||
ANSI_GRAY = "\x1b[90m"
|
||||
ANSI_RESET = "\x1b[0m"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Terminal diagnostics
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class AIAgentConsole:
|
||||
"""Serialised terminal output for the standalone AI agent.
|
||||
|
||||
The behaviour mirrors :class:`HumanClientConsole`: by default each
|
||||
decision clears the terminal first, while ``keep_history=True`` leaves
|
||||
previous game/act logs in scrollback. The LLM stream is printed in gray
|
||||
so model output stays visually separate from game state and final
|
||||
actions.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
output_stream: IO[str] | None = None,
|
||||
keep_history: bool = False,
|
||||
use_color: bool = True,
|
||||
) -> None:
|
||||
self._output = output_stream if output_stream is not None else sys.stdout
|
||||
self._keep_history = keep_history
|
||||
self._use_color = use_color
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@contextmanager
|
||||
def act_log(self, observation: dict[str, Any]) -> Iterator[None]:
|
||||
"""Render one received ``/act`` payload and hold the console lock."""
|
||||
with self._lock:
|
||||
if not self._keep_history:
|
||||
clear_screen(self._write)
|
||||
self._write(render_observation(observation))
|
||||
yield
|
||||
|
||||
def announce_game(self, game_state: dict[str, Any]) -> None:
|
||||
"""Render one received ``/game`` payload."""
|
||||
with self._lock:
|
||||
self._write(render_game_state(game_state))
|
||||
|
||||
def begin_llm_stream(self) -> None:
|
||||
self._write(self._gray("AI MODEL STREAM\n"))
|
||||
|
||||
def write_llm_delta(self, kind: str, text: str) -> None:
|
||||
if not text:
|
||||
return
|
||||
self._write(self._gray(text))
|
||||
|
||||
def end_llm_stream(self) -> None:
|
||||
self._write(self._gray("\n"))
|
||||
|
||||
def announce_action(
|
||||
self,
|
||||
action: dict[str, Any],
|
||||
source: str = "model",
|
||||
) -> None:
|
||||
body = json.dumps(action, ensure_ascii=False)
|
||||
self._write(f"\nAI ACTION ({source}) -> {body}\n")
|
||||
self._write("~" * 60 + "\n\n")
|
||||
|
||||
def announce_warning(self, message: str) -> None:
|
||||
self._write(f"\nAI WARNING -> {message}\n")
|
||||
|
||||
def _gray(self, text: str) -> str:
|
||||
if not self._use_color:
|
||||
return text
|
||||
return f"{ANSI_GRAY}{text}{ANSI_RESET}"
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
self._output.write(text)
|
||||
self._output.flush()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt rendering
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class PromptLibrary:
|
||||
"""Loads and renders prompt templates from disk.
|
||||
|
||||
Templates use Python ``str.format`` placeholders. Centralising the
|
||||
render step here keeps the LLM-facing wording out of the code base and
|
||||
avoids ad-hoc string concatenation in the agent logic.
|
||||
"""
|
||||
|
||||
def __init__(self, directory: Path = PROMPTS_DIR) -> None:
|
||||
self.directory = directory
|
||||
# Cache prompts by name; they are a couple of KB and read-only at
|
||||
# runtime, so we trade a few bytes of memory for zero disk IO per
|
||||
# request.
|
||||
self._cache: dict[str, str] = {}
|
||||
|
||||
def load(self, name: str) -> str:
|
||||
if name not in self._cache:
|
||||
path = self.directory / f"{name}.md"
|
||||
if not path.exists():
|
||||
raise FileNotFoundError(f"missing prompt template: {path}")
|
||||
self._cache[name] = path.read_text(encoding="utf-8")
|
||||
return self._cache[name]
|
||||
|
||||
def render(self, name: str, **fields: Any) -> str:
|
||||
"""Return the named template with ``fields`` substituted via format()."""
|
||||
template = self.load(name)
|
||||
return template.format(**fields)
|
||||
|
||||
|
||||
def render_game_start_prompt(library: PromptLibrary, game_state: dict[str, Any]) -> str:
|
||||
"""Build the opening user-message describing a new hand.
|
||||
|
||||
Pulls out the heavy formatting (player rows, hand history) into helpers
|
||||
so the template itself stays declarative.
|
||||
"""
|
||||
return library.render(
|
||||
"game_start",
|
||||
game_id=game_state.get("game_id"),
|
||||
hand_number=game_state.get("hand_number"),
|
||||
status=game_state.get("status"),
|
||||
small_blind=game_state.get("small_blind"),
|
||||
big_blind=game_state.get("big_blind"),
|
||||
button_seat=game_state.get("button_seat"),
|
||||
starting_stack=game_state.get("starting_stack"),
|
||||
players_block=_format_players_block(game_state.get("players") or []),
|
||||
hand_count=len(game_state.get("hands") or []),
|
||||
history_block=_format_history_block(game_state.get("hands") or []),
|
||||
)
|
||||
|
||||
|
||||
def render_observation_prompt(
|
||||
library: PromptLibrary, observation: dict[str, Any]
|
||||
) -> str:
|
||||
"""Build the per-decision user-message from a server observation dict."""
|
||||
legal_actions = list(observation.get("legal_actions") or [])
|
||||
you = _find_self_player(observation)
|
||||
return library.render(
|
||||
"observation",
|
||||
hand_number=observation.get("hand_number"),
|
||||
street=observation.get("street"),
|
||||
player_id=observation.get("player_id"),
|
||||
player_name=you.get("name") if you else observation.get("player_id"),
|
||||
seat=observation.get("seat"),
|
||||
button_seat=observation.get("button_seat"),
|
||||
pot=observation.get("pot"),
|
||||
to_call=observation.get("to_call"),
|
||||
min_raise_to=observation.get("min_raise_to"),
|
||||
amount_mode=observation.get("amount_mode") or "street_total",
|
||||
hole_cards=_format_card_list(observation.get("hole_cards")),
|
||||
board=_format_card_list(observation.get("board")),
|
||||
players_block=_format_players_block(observation.get("players") or []),
|
||||
action_history_block=_format_action_history(
|
||||
observation.get("action_history") or []
|
||||
),
|
||||
legal_actions_block=_format_legal_actions(legal_actions),
|
||||
)
|
||||
|
||||
|
||||
def _find_self_player(observation: dict[str, Any]) -> dict[str, Any] | None:
|
||||
"""Locate the acting player's row inside the observation snapshot."""
|
||||
pid = observation.get("player_id")
|
||||
for player in observation.get("players") or []:
|
||||
if player.get("player_id") == pid:
|
||||
return player
|
||||
return None
|
||||
|
||||
|
||||
def _format_card_list(cards: Any) -> str:
|
||||
"""Render a list of card labels for prompts; never returns an empty string."""
|
||||
if not cards:
|
||||
return "(none)"
|
||||
return " ".join(str(card) for card in cards)
|
||||
|
||||
|
||||
def _format_players_block(players: list[dict[str, Any]]) -> str:
|
||||
"""Render the per-seat status table used in both prompt templates."""
|
||||
if not players:
|
||||
return "(no players)"
|
||||
rows: list[str] = []
|
||||
for player in players:
|
||||
flags = []
|
||||
if player.get("folded"):
|
||||
flags.append("folded")
|
||||
if player.get("all_in"):
|
||||
flags.append("all_in")
|
||||
if not player.get("in_hand"):
|
||||
flags.append("out")
|
||||
flag_text = ",".join(flags) if flags else "active"
|
||||
rows.append(
|
||||
f"- seat {player.get('seat')}: id={player.get('player_id')}, "
|
||||
f"name={player.get('name')}, stack={player.get('stack')}, "
|
||||
f"street_bet={player.get('street_bet', 0)}, "
|
||||
f"total_bet={player.get('total_bet', 0)}, status={flag_text}"
|
||||
)
|
||||
return "\n".join(rows)
|
||||
|
||||
|
||||
def _format_history_block(hands: list[dict[str, Any]]) -> str:
|
||||
"""Render a compact digest of finished hands for the GAME_START message."""
|
||||
if not hands:
|
||||
return "(no hands played yet)"
|
||||
digests: list[str] = []
|
||||
for hand in hands:
|
||||
awards = hand.get("awards") or []
|
||||
winner_lines = [
|
||||
f" pot {a.get('amount')}: -> {','.join(a.get('winners') or [])} "
|
||||
f"({(a.get('hand_value') or {}).get('name', '-')})"
|
||||
for a in awards
|
||||
]
|
||||
showdown = hand.get("showdown_hands") or {}
|
||||
showdown_lines = [
|
||||
f" showdown {pid}: {' '.join(cards)}"
|
||||
for pid, cards in showdown.items()
|
||||
]
|
||||
digests.append(
|
||||
"\n".join(
|
||||
[
|
||||
f"- Hand #{hand.get('hand_number')} "
|
||||
f"(button seat {hand.get('button_seat')}), "
|
||||
f"board: {_format_card_list(hand.get('board'))}",
|
||||
*winner_lines,
|
||||
*showdown_lines,
|
||||
]
|
||||
)
|
||||
)
|
||||
return "\n".join(digests)
|
||||
|
||||
|
||||
def _format_action_history(history: list[dict[str, Any]]) -> str:
|
||||
"""Render the per-action log; trims very old entries to keep prompts cheap."""
|
||||
if not history:
|
||||
return "(no actions yet)"
|
||||
# The engine never produces unbounded history within a single hand, but
|
||||
# we cap defensively so a malformed payload cannot blow up token usage.
|
||||
rows = []
|
||||
for record in history[-32:]:
|
||||
rows.append(
|
||||
f"- [{record.get('street')}] {record.get('player_id')} -> "
|
||||
f"{record.get('action')} amount={record.get('amount', 0)}"
|
||||
)
|
||||
return "\n".join(rows)
|
||||
|
||||
|
||||
def _format_legal_actions(legal: list[dict[str, Any]]) -> str:
|
||||
"""Render a numbered list of legal actions including amount ranges."""
|
||||
if not legal:
|
||||
return "(no legal actions)"
|
||||
rows: list[str] = []
|
||||
for index, action in enumerate(legal, start=1):
|
||||
amount = action.get("amount")
|
||||
if action.get("action") in {"bet", "raise"}:
|
||||
rows.append(
|
||||
f"{index}. {action['action']}: street_total in "
|
||||
f"[{action.get('min_amount')}, {action.get('max_amount')}]"
|
||||
)
|
||||
else:
|
||||
rows.append(f"{index}. {action['action']} (amount={amount})")
|
||||
return "\n".join(rows)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# OpenAI-compatible chat client
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMConfig:
|
||||
"""Static configuration for the OpenAI-compatible provider.
|
||||
|
||||
A small dataclass keeps the constructor wiring readable and lets us add
|
||||
fields (e.g. organisation id) without touching every call site.
|
||||
"""
|
||||
|
||||
base_url: str
|
||||
api_key: str
|
||||
model: str
|
||||
timeout_seconds: float = 60.0
|
||||
temperature: float = 0.4
|
||||
stream: bool = True
|
||||
|
||||
def chat_completions_url(self) -> str:
|
||||
"""Return the canonical chat completions URL for the configured base."""
|
||||
base = self.base_url.rstrip("/")
|
||||
# Tolerate users passing the full ``/chat/completions`` path; only
|
||||
# the OpenAI-style base URL is documented but mistakes are common.
|
||||
if base.endswith("/chat/completions"):
|
||||
return base
|
||||
return f"{base}/chat/completions"
|
||||
|
||||
|
||||
class LLMClient:
|
||||
"""Thin wrapper around the OpenAI-compatible Chat Completions API.
|
||||
|
||||
Implemented with ``urllib`` to honour the project's "no third-party
|
||||
dependency" constraint; swapping in ``httpx`` later would only touch
|
||||
this class.
|
||||
"""
|
||||
|
||||
def __init__(self, config: LLMConfig) -> None:
|
||||
self.config = config
|
||||
|
||||
def chat(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
on_delta: Callable[[str, str], None] | None = None,
|
||||
) -> str:
|
||||
"""Send a chat completion request and return the assistant text."""
|
||||
if self.config.stream:
|
||||
return self._chat_stream(messages, on_delta)
|
||||
return self._chat_once(messages, on_delta)
|
||||
|
||||
def _request(self, body: dict[str, Any]) -> Request:
|
||||
body = json.dumps(
|
||||
body
|
||||
).encode("utf-8")
|
||||
return Request(
|
||||
self.config.chat_completions_url(),
|
||||
data=body,
|
||||
headers={
|
||||
"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.config.api_key}",
|
||||
},
|
||||
method="POST",
|
||||
)
|
||||
|
||||
def _chat_once(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
on_delta: Callable[[str, str], None] | None = None,
|
||||
) -> str:
|
||||
request = self._request(
|
||||
{
|
||||
"model": self.config.model,
|
||||
"messages": messages,
|
||||
"temperature": self.config.temperature,
|
||||
"stream": False,
|
||||
}
|
||||
)
|
||||
try:
|
||||
with urlopen(request, timeout=self.config.timeout_seconds) as resp:
|
||||
payload = json.loads(resp.read().decode("utf-8"))
|
||||
except HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(
|
||||
f"LLM HTTP {exc.code} from {self.config.chat_completions_url()}: {detail}"
|
||||
) from exc
|
||||
except (OSError, URLError) as exc:
|
||||
raise RuntimeError(
|
||||
f"LLM request failed: {self.config.chat_completions_url()}"
|
||||
) from exc
|
||||
try:
|
||||
message = payload["choices"][0]["message"]
|
||||
reasoning = _reasoning_text(message)
|
||||
content = _message_text(message.get("content"))
|
||||
if on_delta and reasoning:
|
||||
on_delta("reasoning", reasoning)
|
||||
if on_delta and content:
|
||||
on_delta("content", content)
|
||||
return content
|
||||
except (KeyError, IndexError, TypeError) as exc:
|
||||
raise RuntimeError(f"LLM returned unexpected payload: {payload}") from exc
|
||||
|
||||
def _chat_stream(
|
||||
self,
|
||||
messages: list[dict[str, Any]],
|
||||
on_delta: Callable[[str, str], None] | None = None,
|
||||
) -> str:
|
||||
request = self._request(
|
||||
{
|
||||
"model": self.config.model,
|
||||
"messages": messages,
|
||||
"temperature": self.config.temperature,
|
||||
"stream": True,
|
||||
}
|
||||
)
|
||||
parts: list[str] = []
|
||||
try:
|
||||
with urlopen(request, timeout=self.config.timeout_seconds) as resp:
|
||||
for event in _iter_sse_payloads(resp):
|
||||
if event == "[DONE]":
|
||||
break
|
||||
try:
|
||||
payload = json.loads(event)
|
||||
except json.JSONDecodeError as exc:
|
||||
raise RuntimeError(
|
||||
f"LLM returned invalid stream event: {event!r}"
|
||||
) from exc
|
||||
delta = _stream_delta(payload)
|
||||
reasoning = _reasoning_text(delta)
|
||||
content = _message_text(delta.get("content"))
|
||||
if reasoning and on_delta:
|
||||
on_delta("reasoning", reasoning)
|
||||
if content:
|
||||
parts.append(content)
|
||||
if on_delta:
|
||||
on_delta("content", content)
|
||||
except HTTPError as exc:
|
||||
detail = exc.read().decode("utf-8", errors="replace")
|
||||
raise RuntimeError(
|
||||
f"LLM HTTP {exc.code} from {self.config.chat_completions_url()}: {detail}"
|
||||
) from exc
|
||||
except (OSError, URLError) as exc:
|
||||
raise RuntimeError(
|
||||
f"LLM request failed: {self.config.chat_completions_url()}"
|
||||
) from exc
|
||||
return "".join(parts)
|
||||
|
||||
|
||||
def _message_text(value: Any) -> str:
|
||||
if value is None:
|
||||
return ""
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
return str(value)
|
||||
|
||||
|
||||
def _reasoning_text(message_or_delta: dict[str, Any]) -> str:
|
||||
for key in ("reasoning_content", "reasoning", "reasoning_text"):
|
||||
text = _message_text(message_or_delta.get(key))
|
||||
if text:
|
||||
return text
|
||||
return ""
|
||||
|
||||
|
||||
def _stream_delta(payload: dict[str, Any]) -> dict[str, Any]:
|
||||
try:
|
||||
delta = payload["choices"][0].get("delta") or {}
|
||||
except (KeyError, IndexError, TypeError) as exc:
|
||||
raise RuntimeError(f"LLM returned unexpected stream payload: {payload}") from exc
|
||||
if not isinstance(delta, dict):
|
||||
raise RuntimeError(f"LLM returned unexpected stream delta: {payload}")
|
||||
return delta
|
||||
|
||||
|
||||
def _iter_sse_payloads(response: Any) -> Iterator[str]:
|
||||
"""Yield ``data:`` payloads from an OpenAI-compatible SSE response."""
|
||||
data_lines: list[str] = []
|
||||
for raw in response:
|
||||
line = raw.decode("utf-8", errors="replace").rstrip("\r\n")
|
||||
if line == "":
|
||||
if data_lines:
|
||||
yield "\n".join(data_lines)
|
||||
data_lines = []
|
||||
continue
|
||||
if line.startswith("data:"):
|
||||
data_lines.append(line[5:].lstrip())
|
||||
if data_lines:
|
||||
yield "\n".join(data_lines)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Session lifecycle
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@dataclass
|
||||
class LLMSession:
|
||||
"""Chat session bound to a single hand.
|
||||
|
||||
The system prompt is fixed for the whole match while the user/assistant
|
||||
exchange is reset every time a new hand begins (i.e. when ``/game`` is
|
||||
received). Storing recent assistant turns lets the model maintain
|
||||
intra-hand continuity without re-paying for the long table snapshot.
|
||||
"""
|
||||
|
||||
system_prompt: str
|
||||
game_id: str
|
||||
player_id: str
|
||||
messages: list[dict[str, Any]] = field(default_factory=list)
|
||||
hand_number: int = 0
|
||||
|
||||
def reset_with_game(self, hand_number: int, game_user_message: str) -> None:
|
||||
"""Start a fresh exchange for a new hand."""
|
||||
self.hand_number = hand_number
|
||||
self.messages = [
|
||||
{"role": "system", "content": self.system_prompt},
|
||||
{"role": "user", "content": game_user_message},
|
||||
]
|
||||
|
||||
def append_user(self, content: str) -> None:
|
||||
self.messages.append({"role": "user", "content": content})
|
||||
|
||||
def append_assistant(self, content: str) -> None:
|
||||
self.messages.append({"role": "assistant", "content": content})
|
||||
|
||||
def chat_messages(self) -> list[dict[str, Any]]:
|
||||
# Always include the system prompt; if reset_with_game has not been
|
||||
# called yet (e.g. /act arrives before /game), we still want a
|
||||
# legal request to go through.
|
||||
if not self.messages:
|
||||
return [{"role": "system", "content": self.system_prompt}]
|
||||
return list(self.messages)
|
||||
|
||||
|
||||
class SessionRegistry:
|
||||
"""Tiny LRU-style registry keyed by ``(game_id, player_id)``.
|
||||
|
||||
Multiple parallel games or multiple seats served by the same process
|
||||
each need an isolated chat history; the registry provides exactly that
|
||||
while bounding memory.
|
||||
"""
|
||||
|
||||
def __init__(self, system_prompt: str, max_sessions: int = 64) -> None:
|
||||
self._system_prompt = system_prompt
|
||||
self._sessions: OrderedDict[tuple[str, str], LLMSession] = OrderedDict()
|
||||
self._max = max_sessions
|
||||
self._lock = threading.Lock()
|
||||
|
||||
def get_or_create(self, game_id: str, player_id: str) -> LLMSession:
|
||||
with self._lock:
|
||||
key = (game_id, player_id)
|
||||
session = self._sessions.get(key)
|
||||
if session is None:
|
||||
session = LLMSession(
|
||||
system_prompt=self._system_prompt,
|
||||
game_id=game_id,
|
||||
player_id=player_id,
|
||||
)
|
||||
self._sessions[key] = session
|
||||
# Drop the oldest if we exceed the cap; LLM context is
|
||||
# expensive but we never need stale game histories.
|
||||
while len(self._sessions) > self._max:
|
||||
self._sessions.popitem(last=False)
|
||||
else:
|
||||
self._sessions.move_to_end(key)
|
||||
return session
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Action parsing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
_JSON_OBJECT_RE = re.compile(r"\{[\s\S]*\}")
|
||||
|
||||
|
||||
def parse_action_reply(reply: str) -> dict[str, Any]:
|
||||
"""Extract the action JSON from a possibly chatty LLM response.
|
||||
|
||||
LLMs occasionally wrap JSON in markdown fences or add a sentence of
|
||||
chatter despite explicit instructions. We pluck the first ``{...}``
|
||||
block and parse it; downstream code (engine ``_coerce_action``) will
|
||||
sanitise illegal values, so we do not need to validate ranges here.
|
||||
"""
|
||||
if not isinstance(reply, str) or not reply.strip():
|
||||
raise ValueError("empty LLM reply")
|
||||
match = _JSON_OBJECT_RE.search(reply)
|
||||
if match is None:
|
||||
raise ValueError(f"no JSON object found in LLM reply: {reply!r}")
|
||||
try:
|
||||
payload = json.loads(match.group(0))
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError(f"invalid JSON in LLM reply: {reply!r}") from exc
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError(f"LLM reply was not a JSON object: {reply!r}")
|
||||
return {
|
||||
"action": str(payload.get("action") or "fold"),
|
||||
"amount": int(payload.get("amount") or 0),
|
||||
}
|
||||
|
||||
|
||||
def fallback_action(observation: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Pick a safe legal action when the LLM call fails entirely.
|
||||
|
||||
Order of preference: ``check`` > ``call`` (cheapest) > ``fold``.
|
||||
"""
|
||||
legal = observation.get("legal_actions") or []
|
||||
by_name = {item.get("action"): item for item in legal}
|
||||
if "check" in by_name:
|
||||
return {"action": "check", "amount": 0}
|
||||
if "call" in by_name:
|
||||
call = by_name["call"]
|
||||
return {"action": "call", "amount": int(call.get("amount") or 0)}
|
||||
if "fold" in by_name:
|
||||
return {"action": "fold", "amount": 0}
|
||||
# Last resort: echo the first legal action as-is.
|
||||
if legal:
|
||||
first = legal[0]
|
||||
return {
|
||||
"action": str(first.get("action")),
|
||||
"amount": int(first.get("amount") or 0),
|
||||
}
|
||||
return {"action": "fold", "amount": 0}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# HTTP service
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class AIAgentService:
|
||||
"""Glues the LLM client, prompt library and session registry together.
|
||||
|
||||
Exposed as a single object so that the HTTP handler stays thin and
|
||||
purely concerned with request parsing and response framing.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm: LLMClient,
|
||||
prompts: PromptLibrary,
|
||||
registry: SessionRegistry | None = None,
|
||||
console: AIAgentConsole | None = None,
|
||||
) -> None:
|
||||
self.llm = llm
|
||||
self.prompts = prompts
|
||||
self.console = console
|
||||
# The system prompt is read once and shared across all sessions to
|
||||
# avoid stale copies if the operator hot-edits the markdown file
|
||||
# mid-game (intentional: restart the agent to pick up changes).
|
||||
system_prompt = self.prompts.load("system")
|
||||
self.registry = registry or SessionRegistry(system_prompt=system_prompt)
|
||||
|
||||
def handle_game(self, game_state: dict[str, Any], player_id: str) -> None:
|
||||
"""Open or refresh the per-hand session."""
|
||||
if self.console:
|
||||
self.console.announce_game(game_state)
|
||||
game_id = str(game_state.get("game_id") or "")
|
||||
hand_number = int(game_state.get("hand_number") or 0)
|
||||
session = self.registry.get_or_create(game_id, player_id)
|
||||
session.reset_with_game(
|
||||
hand_number=hand_number,
|
||||
game_user_message=render_game_start_prompt(self.prompts, game_state),
|
||||
)
|
||||
|
||||
def handle_act(self, observation: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Render the prompt, call the LLM, parse the reply."""
|
||||
if self.console:
|
||||
with self.console.act_log(observation):
|
||||
return self._handle_act_locked(observation)
|
||||
return self._handle_act_locked(observation)
|
||||
|
||||
def _handle_act_locked(self, observation: dict[str, Any]) -> dict[str, Any]:
|
||||
game_id = str(observation.get("game_id") or "")
|
||||
player_id = str(observation.get("player_id") or "")
|
||||
session = self.registry.get_or_create(game_id, player_id)
|
||||
# Ensure we never silently drop the system prompt even if /game
|
||||
# arrives after /act (e.g. process started mid-hand on a restart).
|
||||
if not session.messages:
|
||||
session.messages = [
|
||||
{"role": "system", "content": session.system_prompt}
|
||||
]
|
||||
user_msg = render_observation_prompt(self.prompts, observation)
|
||||
session.append_user(user_msg)
|
||||
|
||||
try:
|
||||
if self.console:
|
||||
self.console.begin_llm_stream()
|
||||
assistant_text = self.llm.chat(
|
||||
session.chat_messages(),
|
||||
on_delta=self.console.write_llm_delta if self.console else None,
|
||||
)
|
||||
if self.console:
|
||||
self.console.end_llm_stream()
|
||||
except RuntimeError as exc:
|
||||
# On any LLM error, use the safe fallback. We also drop the
|
||||
# last user message to avoid contaminating the next turn with
|
||||
# a request that produced no assistant reply.
|
||||
session.messages.pop()
|
||||
action = fallback_action(observation)
|
||||
if self.console:
|
||||
self.console.announce_warning(str(exc))
|
||||
self.console.announce_action(action, source="fallback")
|
||||
return action
|
||||
|
||||
session.append_assistant(assistant_text)
|
||||
try:
|
||||
action = parse_action_reply(assistant_text)
|
||||
if self.console:
|
||||
self.console.announce_action(action, source="model")
|
||||
return action
|
||||
except ValueError as exc:
|
||||
# Reply was unparseable; keep it in the history so the LLM can
|
||||
# see what it did wrong on the next turn (no extra prompt
|
||||
# needed - the next observation will fill that role) and
|
||||
# answer with a safe action this turn.
|
||||
action = fallback_action(observation)
|
||||
if self.console:
|
||||
self.console.announce_warning(str(exc))
|
||||
self.console.announce_action(action, source="fallback")
|
||||
return action
|
||||
|
||||
|
||||
def _bind_player_id(handler: BaseHTTPRequestHandler) -> str:
|
||||
"""Resolve which seat a request belongs to.
|
||||
|
||||
The standalone process serves *one* AI seat by default. Multi-seat
|
||||
deployments can pass ``X-Player-Id`` so the registry can keep the
|
||||
sessions isolated.
|
||||
"""
|
||||
explicit = handler.headers.get("X-Player-Id")
|
||||
if explicit:
|
||||
return explicit
|
||||
return getattr(handler.server, "default_player_id", "ai") # type: ignore[attr-defined]
|
||||
|
||||
|
||||
class AIRequestHandler(BaseHTTPRequestHandler):
|
||||
"""HTTP entry point for the AI agent.
|
||||
|
||||
Routes:
|
||||
- ``GET /health`` - liveness probe.
|
||||
- ``POST /game`` - new hand boundary; opens a fresh session.
|
||||
- ``POST /act`` - returns the AI-decided action.
|
||||
"""
|
||||
|
||||
server_version = "TexasHoldemAIAgent/0.1"
|
||||
service: AIAgentService # injected by ``create_server``
|
||||
|
||||
def do_GET(self) -> None:
|
||||
if self.path == "/health":
|
||||
self._json({"ok": True})
|
||||
return
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
|
||||
def do_POST(self) -> None:
|
||||
routes = {
|
||||
"/game": self._handle_game,
|
||||
"/act": self._handle_act,
|
||||
}
|
||||
handler = routes.get(self.path)
|
||||
if handler is None:
|
||||
self._json({"error": "not found"}, HTTPStatus.NOT_FOUND)
|
||||
return
|
||||
|
||||
try:
|
||||
payload = self._read_json()
|
||||
except ValueError as exc:
|
||||
self._json({"error": str(exc)}, HTTPStatus.BAD_REQUEST)
|
||||
return
|
||||
|
||||
try:
|
||||
handler(payload)
|
||||
except Exception as exc: # pragma: no cover - last-resort guard
|
||||
self._json({"error": str(exc)}, HTTPStatus.INTERNAL_SERVER_ERROR)
|
||||
|
||||
def _handle_game(self, payload: dict[str, Any]) -> None:
|
||||
self.service.handle_game(payload, player_id=_bind_player_id(self))
|
||||
self._empty(HTTPStatus.NO_CONTENT)
|
||||
|
||||
def _handle_act(self, payload: dict[str, Any]) -> None:
|
||||
action = self.service.handle_act(payload)
|
||||
self._json(action)
|
||||
|
||||
def log_message(self, format: str, *args: Any) -> None: # noqa: A002
|
||||
return
|
||||
|
||||
def _read_json(self) -> dict[str, Any]:
|
||||
length = int(self.headers.get("Content-Length", "0"))
|
||||
if length <= 0:
|
||||
raise ValueError("request body is required")
|
||||
try:
|
||||
payload = json.loads(self.rfile.read(length).decode("utf-8"))
|
||||
except json.JSONDecodeError as exc:
|
||||
raise ValueError("request body must be valid JSON") from exc
|
||||
if not isinstance(payload, dict):
|
||||
raise ValueError("request body must be a JSON object")
|
||||
return payload
|
||||
|
||||
def _json(
|
||||
self, payload: dict[str, Any], status: HTTPStatus = HTTPStatus.OK
|
||||
) -> None:
|
||||
body = json.dumps(payload, ensure_ascii=True).encode("utf-8")
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Type", "application/json")
|
||||
self.send_header("Content-Length", str(len(body)))
|
||||
self.end_headers()
|
||||
self.wfile.write(body)
|
||||
|
||||
def _empty(self, status: HTTPStatus) -> None:
|
||||
self.send_response(status)
|
||||
self.send_header("Content-Length", "0")
|
||||
self.end_headers()
|
||||
|
||||
|
||||
def create_server(
|
||||
host: str,
|
||||
port: int,
|
||||
service: AIAgentService,
|
||||
default_player_id: str = "ai",
|
||||
) -> ThreadingHTTPServer:
|
||||
"""Build and configure the HTTP server.
|
||||
|
||||
The ``service`` and ``default_player_id`` are attached to the server /
|
||||
handler classes so that all worker threads share a single instance,
|
||||
which in turn means a single registry of sessions.
|
||||
"""
|
||||
server = ThreadingHTTPServer((host, port), AIRequestHandler)
|
||||
AIRequestHandler.service = service
|
||||
server.default_player_id = default_player_id # type: ignore[attr-defined]
|
||||
return server
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# CLI entry point
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description=(
|
||||
"Run an OpenAI-compatible AI poker agent that exposes "
|
||||
"POST /act and POST /game."
|
||||
)
|
||||
)
|
||||
parser.add_argument("--host", default="127.0.0.1")
|
||||
parser.add_argument("--port", default=9101, type=int)
|
||||
parser.add_argument(
|
||||
"--base-url",
|
||||
default=os.environ.get("OPENAI_BASE_URL", "https://api.openai.com/v1"),
|
||||
help="OpenAI-compatible base URL (default: $OPENAI_BASE_URL or "
|
||||
"https://api.openai.com/v1).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--api-key",
|
||||
default=os.environ.get("OPENAI_API_KEY", ""),
|
||||
help="API key (default: $OPENAI_API_KEY).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default=os.environ.get("OPENAI_MODEL", "gpt-4o-mini"),
|
||||
help="Model identifier (default: $OPENAI_MODEL or gpt-4o-mini).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--timeout",
|
||||
default=60.0,
|
||||
type=float,
|
||||
help="LLM request timeout in seconds.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--temperature",
|
||||
default=0.4,
|
||||
type=float,
|
||||
help="LLM sampling temperature.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--player-id",
|
||||
default="ai",
|
||||
help=(
|
||||
"Default player_id used to key sessions. Override per-request "
|
||||
"via the X-Player-Id header for multi-seat setups."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--prompts-dir",
|
||||
default=str(PROMPTS_DIR),
|
||||
help="Directory containing the prompt markdown templates.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--keep-history",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Keep previous terminal output when a new /act request arrives "
|
||||
"instead of clearing the screen."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-stream",
|
||||
action="store_true",
|
||||
help="Disable streaming Chat Completions requests.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no-color",
|
||||
action="store_true",
|
||||
help="Disable ANSI gray coloring for streamed LLM output.",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
if not args.api_key:
|
||||
parser.error("--api-key (or OPENAI_API_KEY) is required")
|
||||
|
||||
config = LLMConfig(
|
||||
base_url=args.base_url,
|
||||
api_key=args.api_key,
|
||||
model=args.model,
|
||||
timeout_seconds=args.timeout,
|
||||
temperature=args.temperature,
|
||||
stream=not args.no_stream,
|
||||
)
|
||||
prompts = PromptLibrary(directory=Path(args.prompts_dir))
|
||||
console = AIAgentConsole(
|
||||
keep_history=args.keep_history,
|
||||
use_color=not args.no_color,
|
||||
)
|
||||
service = AIAgentService(LLMClient(config), prompts, console=console)
|
||||
server = create_server(args.host, args.port, service, default_player_id=args.player_id)
|
||||
|
||||
print(
|
||||
f"AI HTTP agent listening on http://{args.host}:{args.port}\n"
|
||||
f" POST /act - decision request\n"
|
||||
f" POST /game - new-hand snapshot (opens a fresh session)\n"
|
||||
f" model : {config.model}\n"
|
||||
f" base_url : {config.base_url}\n"
|
||||
f" player_id : {args.player_id}\n"
|
||||
f" stream : {'on' if config.stream else 'off'}\n"
|
||||
f" clear-screen: {'off (keep history)' if args.keep_history else 'on'}",
|
||||
file=sys.stderr,
|
||||
flush=True,
|
||||
)
|
||||
try:
|
||||
server.serve_forever()
|
||||
except KeyboardInterrupt:
|
||||
pass
|
||||
finally:
|
||||
server.server_close()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -79,6 +79,12 @@ class TableGame:
|
||||
self._advance_button()
|
||||
assert self.button_index is not None
|
||||
|
||||
# Notify every agent that a new hand is starting. Pushing here (as
|
||||
# opposed to after ``_award_pots``) lets HTTP agents seed a fresh
|
||||
# session with the latest table state and per-hand history before
|
||||
# any decision is asked of them.
|
||||
self._broadcast_game_update()
|
||||
|
||||
self._deal_hole_cards(deck)
|
||||
small_blind_index, big_blind_index = self._blind_indexes()
|
||||
self._post_blind(small_blind_index, "small_blind", self.small_blind)
|
||||
@@ -115,9 +121,6 @@ class TableGame:
|
||||
finished_at=time(),
|
||||
)
|
||||
self.hand_summaries.append(summary)
|
||||
# Notify every agent so HTTP-backed clients can render the just
|
||||
# finished hand. Failures here must never abort the table.
|
||||
self._broadcast_game_update()
|
||||
return summary
|
||||
|
||||
def run_hands(self, max_hands: int, until_one_left: bool = False) -> list[HandSummary]:
|
||||
|
||||
@@ -0,0 +1,22 @@
|
||||
# GAME_START
|
||||
|
||||
A new hand of Texas Hold'em is about to begin. Use this snapshot as the
|
||||
fresh context for every decision in the upcoming hand. Hole cards and
|
||||
betting state from prior hands are NOT carried over.
|
||||
|
||||
## Table
|
||||
|
||||
- Game id: {game_id}
|
||||
- Hand number: {hand_number}
|
||||
- Status: {status}
|
||||
- Blinds: small={small_blind}, big={big_blind}
|
||||
- Button seat: {button_seat}
|
||||
- Starting stack: {starting_stack}
|
||||
|
||||
## Players (current stacks)
|
||||
|
||||
{players_block}
|
||||
|
||||
## Hands played so far ({hand_count})
|
||||
|
||||
{history_block}
|
||||
@@ -0,0 +1,36 @@
|
||||
# OBSERVATION (your turn to act)
|
||||
|
||||
It is your turn. Read the state below and respond with a single JSON
|
||||
object: `{{"action": "<name>", "amount": <int>}}`. Pick only from the
|
||||
listed legal actions.
|
||||
|
||||
## Hand state
|
||||
|
||||
- Hand number: {hand_number}
|
||||
- Street: {street}
|
||||
- You are: player_id={player_id}, name={player_name}, seat={seat}
|
||||
- Button seat: {button_seat}
|
||||
- Pot size: {pot}
|
||||
- To call: {to_call}
|
||||
- Min raise to: {min_raise_to}
|
||||
- Amount semantics for bet/raise: {amount_mode} (the integer is the
|
||||
target total street bet, NOT the delta on top of your current bet)
|
||||
|
||||
## Cards
|
||||
|
||||
- Your hole cards: {hole_cards}
|
||||
- Community board: {board}
|
||||
|
||||
## Players at the table
|
||||
|
||||
{players_block}
|
||||
|
||||
## Action history (this hand)
|
||||
|
||||
{action_history_block}
|
||||
|
||||
## Legal actions
|
||||
|
||||
{legal_actions_block}
|
||||
|
||||
Respond NOW with one JSON line and nothing else.
|
||||
@@ -0,0 +1,63 @@
|
||||
# Role
|
||||
|
||||
You are an expert No-Limit Texas Hold'em poker player participating in a
|
||||
multi-agent table game. You play one fixed seat for the entire match.
|
||||
|
||||
You will receive:
|
||||
|
||||
1. A "GAME_START" message at the beginning of every new hand, containing the
|
||||
full table snapshot (players, stacks, finished hands so far).
|
||||
2. One "OBSERVATION" message per decision point, describing the current
|
||||
street, your hole cards, the public board, the action history of this
|
||||
hand, the legal actions available to you, and the amount semantics.
|
||||
|
||||
# Rules of Texas Hold'em (concise reference)
|
||||
|
||||
- Each player is dealt two private hole cards.
|
||||
- Five community cards are dealt across three streets: flop (3), turn (1),
|
||||
river (1).
|
||||
- Betting rounds occur preflop, flop, turn, river. The best 5-card hand
|
||||
built from any combination of hole + board cards wins the pot.
|
||||
- Hand ranking (high to low): straight flush, four of a kind, full house,
|
||||
flush, straight, three of a kind, two pair, one pair, high card.
|
||||
- "Position" matters: acting later in a street is an advantage.
|
||||
|
||||
# Action protocol
|
||||
|
||||
For every decision request you MUST output a single JSON object and nothing
|
||||
else. The schema is:
|
||||
|
||||
```json
|
||||
{"action": "<one of the legal action names>", "amount": <integer>}
|
||||
```
|
||||
|
||||
- `amount` MUST be an integer (chips, no decimals).
|
||||
- For `bet` and `raise`, `amount` is interpreted as **the target total bet
|
||||
on the current street** (`amount_mode = "street_total"` in the
|
||||
observation), and MUST satisfy
|
||||
`min_amount <= amount <= max_amount` from the matching legal action.
|
||||
- For `fold`, `check`, `call`, `all_in`, set `amount` to the value provided
|
||||
by the matching legal action (typically 0 for fold/check, the call cost
|
||||
for call, and the player's remaining stack for all_in).
|
||||
- You MUST pick an action whose name appears in `legal_actions`. Anything
|
||||
else risks being coerced to fold by the engine.
|
||||
|
||||
# Strategic guidance
|
||||
|
||||
- Open value-leaning preflop ranges in late position; tighten in early
|
||||
position.
|
||||
- Continuation-bet on favourable boards; balance with checks on dynamic
|
||||
boards where your range is capped.
|
||||
- Adjust to opponents' tendencies inferred from the hand history (passive
|
||||
callers, aggressive 3-bettors, etc.).
|
||||
- Manage stack-to-pot ratio: avoid bloating pots with marginal made hands;
|
||||
apply pressure with strong draws when fold equity is meaningful.
|
||||
- Never tilt: each decision is independent; ignore prior bad beats when
|
||||
computing pot odds and equity.
|
||||
|
||||
# Output discipline
|
||||
|
||||
- Return ONLY the JSON object on a single line. No explanations, no markdown
|
||||
fencing, no leading text.
|
||||
- If unsure, prefer the safest legal action (`check` if available, else
|
||||
`call` if cheap, else `fold`).
|
||||
Reference in New Issue
Block a user