gglib_core/ports/
llm_completion.rs

1//! Port definition for streaming LLM chat completions.
2//!
3//! This module defines the infrastructure contract that the agent loop uses to
4//! drive an LLM.  The port is intentionally narrow: it speaks **domain types**
5//! ([`AgentMessage`], [`ToolDefinition`], [`LlmStreamEvent`]) and hides all
6//! vendor wire-format details (`OpenAI` JSON schemas, SSE framing, HTTP headers,
7//! etc.) behind the trait boundary.
8//!
9//! # Adapter responsibility
10//!
11//! A concrete implementation (e.g. in `gglib-axum` or `gglib-proxy`) is
12//! responsible for:
13//!
14//! 1. Translating `&[AgentMessage]` into the vendor's `messages` array,
15//!    serialising `ToolCall::arguments` (`serde_json::Value`) into the JSON
16//!    string form that OpenAI-compatible APIs require.
17//! 2. Translating `&[ToolDefinition]` into the vendor's `tools` array.
18//! 3. Parsing the streaming SSE response into a sequence of [`LlmStreamEvent`]
19//!    values, accumulating incremental tool-call deltas where necessary.
20//! 4. When `response_format` is `Some`, injecting the appropriate
21//!    `response_format` / `grammar` field into the vendor request body.
22//!
23//! The agent loop never sees HTTP, never sees `reqwest`, and never contains a
24//! single OpenAI-specific field name.
25
26use std::pin::Pin;
27
28use anyhow::Result;
29use async_trait::async_trait;
30use futures_core::Stream;
31
32use crate::domain::agent::{AgentMessage, LlmStreamEvent, ToolDefinition};
33
34// =============================================================================
35// ResponseFormat — output constraint hint
36// =============================================================================
37
38/// Constrains the output format of a [`LlmCompletionPort::chat_stream`] call.
39///
40/// Pass `Some(&format)` when the caller requires structured output (e.g. for
41/// plan generation in the orchestrator).  Adapters that target llama-server
42/// translate these variants as follows:
43///
44/// | Variant | Wire field |
45/// |---------|------------|
46/// | `JsonSchema` | `response_format: { type: "json_schema", json_schema: { schema, strict } }` |
47/// | `Grammar` | `grammar: "<gbnf string>"` (llama.cpp extension) |
48///
49/// Normal agent-loop calls pass `None`, which leaves the model free-form.
50#[derive(Debug, Clone)]
51pub enum ResponseFormat {
52    /// Constrain the output to a JSON object matching the given JSON Schema.
53    ///
54    /// `strict: true` instructs the model to refuse outputs that do not
55    /// conform to the schema.  Use `strict: false` for best-effort guidance
56    /// when strict validation would be overly rigid.
57    JsonSchema {
58        /// A valid JSON Schema object (Draft-07 or later).
59        schema: serde_json::Value,
60        /// Whether the model should refuse outputs that violate the schema.
61        strict: bool,
62    },
63    /// Constrain output using a GBNF grammar string (llama.cpp extension).
64    ///
65    /// GBNF grammars are more expressive than JSON Schema for some use cases
66    /// (e.g. constraining enum-only outputs without a schema round-trip).
67    Grammar {
68        /// A valid GBNF grammar string understood by llama-server.
69        gbnf: String,
70    },
71}
72
73// =============================================================================
74// LlmCompletionPort
75// =============================================================================
76
77/// Port that the agent loop uses to drive a streaming LLM.
78///
79/// Implementations translate domain messages + tool definitions into
80/// vendor-specific HTTP requests and stream back [`LlmStreamEvent`] values.
81///
82/// # Contract
83///
84/// - The returned stream **must** end with exactly one [`LlmStreamEvent::Done`]
85///   item, even when the finish reason is abnormal (e.g. `"length"`).
86/// - Text and tool-call delta events may interleave freely before `Done`.
87/// - An `Err` item in the stream signals an unrecoverable infrastructure error;
88///   the agent loop will surface it as [`super::agent::AgentError::Internal`].
89#[async_trait]
90pub trait LlmCompletionPort: Send + Sync {
91    /// Begin a chat-completion request and return a live event stream.
92    ///
93    /// # Parameters
94    ///
95    /// - `messages` — conversation history in domain form.
96    /// - `tools` — tool schemas to advertise to the model.
97    /// - `response_format` — optional output constraint.  Pass `None` for
98    ///   free-form generation (the default for all existing callers).
99    ///
100    /// # Returns
101    ///
102    /// A pinned, heap-allocated, `Send`-able stream of [`LlmStreamEvent`].
103    /// The caller drives the stream by polling it; each item is either a
104    /// successfully parsed event or an infrastructure error.
105    async fn chat_stream(
106        &self,
107        messages: &[AgentMessage],
108        tools: &[ToolDefinition],
109        response_format: Option<&ResponseFormat>,
110    ) -> Result<Pin<Box<dyn Stream<Item = Result<LlmStreamEvent>> + Send>>>;
111}
gglib_core/ports/llm_completion.rs

gglib_core/ports/
llm_completion.rs