gglib_core/ports/llm_completion.rs
1//! Port definition for streaming LLM chat completions.
2//!
3//! This module defines the infrastructure contract that the agent loop uses to
4//! drive an LLM. The port is intentionally narrow: it speaks **domain types**
5//! ([`AgentMessage`], [`ToolDefinition`], [`LlmStreamEvent`]) and hides all
6//! vendor wire-format details (`OpenAI` JSON schemas, SSE framing, HTTP headers,
7//! etc.) behind the trait boundary.
8//!
9//! # Adapter responsibility
10//!
11//! A concrete implementation (e.g. in `gglib-axum` or `gglib-proxy`) is
12//! responsible for:
13//!
14//! 1. Translating `&[AgentMessage]` into the vendor's `messages` array,
15//! serialising `ToolCall::arguments` (`serde_json::Value`) into the JSON
16//! string form that OpenAI-compatible APIs require.
17//! 2. Translating `&[ToolDefinition]` into the vendor's `tools` array.
18//! 3. Parsing the streaming SSE response into a sequence of [`LlmStreamEvent`]
19//! values, accumulating incremental tool-call deltas where necessary.
20//! 4. When `response_format` is `Some`, injecting the appropriate
21//! `response_format` / `grammar` field into the vendor request body.
22//!
23//! The agent loop never sees HTTP, never sees `reqwest`, and never contains a
24//! single OpenAI-specific field name.
25
26use std::pin::Pin;
27
28use anyhow::Result;
29use async_trait::async_trait;
30use futures_core::Stream;
31
32use crate::domain::agent::{AgentMessage, LlmStreamEvent, ToolDefinition};
33
34// =============================================================================
35// ResponseFormat — output constraint hint
36// =============================================================================
37
38/// Constrains the output format of a [`LlmCompletionPort::chat_stream`] call.
39///
40/// Pass `Some(&format)` when the caller requires structured output (e.g. for
41/// plan generation in the orchestrator). Adapters that target llama-server
42/// translate these variants as follows:
43///
44/// | Variant | Wire field |
45/// |---------|------------|
46/// | `JsonSchema` | `response_format: { type: "json_schema", json_schema: { schema, strict } }` |
47/// | `Grammar` | `grammar: "<gbnf string>"` (llama.cpp extension) |
48///
49/// Normal agent-loop calls pass `None`, which leaves the model free-form.
50#[derive(Debug, Clone)]
51pub enum ResponseFormat {
52 /// Constrain the output to a JSON object matching the given JSON Schema.
53 ///
54 /// `strict: true` instructs the model to refuse outputs that do not
55 /// conform to the schema. Use `strict: false` for best-effort guidance
56 /// when strict validation would be overly rigid.
57 JsonSchema {
58 /// A valid JSON Schema object (Draft-07 or later).
59 schema: serde_json::Value,
60 /// Whether the model should refuse outputs that violate the schema.
61 strict: bool,
62 },
63 /// Constrain output using a GBNF grammar string (llama.cpp extension).
64 ///
65 /// GBNF grammars are more expressive than JSON Schema for some use cases
66 /// (e.g. constraining enum-only outputs without a schema round-trip).
67 Grammar {
68 /// A valid GBNF grammar string understood by llama-server.
69 gbnf: String,
70 },
71}
72
73// =============================================================================
74// LlmCompletionPort
75// =============================================================================
76
77/// Port that the agent loop uses to drive a streaming LLM.
78///
79/// Implementations translate domain messages + tool definitions into
80/// vendor-specific HTTP requests and stream back [`LlmStreamEvent`] values.
81///
82/// # Contract
83///
84/// - The returned stream **must** end with exactly one [`LlmStreamEvent::Done`]
85/// item, even when the finish reason is abnormal (e.g. `"length"`).
86/// - Text and tool-call delta events may interleave freely before `Done`.
87/// - An `Err` item in the stream signals an unrecoverable infrastructure error;
88/// the agent loop will surface it as [`super::agent::AgentError::Internal`].
89#[async_trait]
90pub trait LlmCompletionPort: Send + Sync {
91 /// Begin a chat-completion request and return a live event stream.
92 ///
93 /// # Parameters
94 ///
95 /// - `messages` — conversation history in domain form.
96 /// - `tools` — tool schemas to advertise to the model.
97 /// - `response_format` — optional output constraint. Pass `None` for
98 /// free-form generation (the default for all existing callers).
99 ///
100 /// # Returns
101 ///
102 /// A pinned, heap-allocated, `Send`-able stream of [`LlmStreamEvent`].
103 /// The caller drives the stream by polling it; each item is either a
104 /// successfully parsed event or an infrastructure error.
105 async fn chat_stream(
106 &self,
107 messages: &[AgentMessage],
108 tools: &[ToolDefinition],
109 response_format: Option<&ResponseFormat>,
110 ) -> Result<Pin<Box<dyn Stream<Item = Result<LlmStreamEvent>> + Send>>>;
111}