gglib_core/normalize/
parser.rs

1//! The [`ToolCallParser`] trait and its companion [`ParserOutput`].
2//!
3//! A parser consumes raw text or reasoning chunks from an LLM stream and
4//! produces a normalized [`ParserOutput`] containing:
5//!
6//! - `forward_text` — bytes that should appear in the downstream
7//!   `LlmStreamEvent::TextDelta`.
8//! - `forward_reasoning` — bytes that should appear in the downstream
9//!   `LlmStreamEvent::ReasoningDelta`.
10//! - `tool_calls` — fully-assembled tool calls extracted from dialect
11//!   markup (e.g. Qwen XML).
12//! - `errors` — non-fatal normalization failures.
13//!
14//! Parsers are stream-stateful: the caller (the `NormalizingStream` adapter)
15//! constructs one parser per stream and drives it with chunk-by-chunk input.
16//! Parsers must therefore be chunk-safe — they buffer ambiguous trailing
17//! bytes internally and flush them on the next call or at [`ToolCallParser::finish`].
18//!
19//! ## Adding a new parser
20//!
21//! 1. Implement [`ToolCallParser`] in a new file under
22//!    [`super::parsers`].
23//! 2. Add a single match arm to [`super::registry::get_parser`] keyed on a
24//!    new `format:*` tag (see [`super::tags`]).
25//!
26//! No other crate participates in the dispatch decision.
27
28use super::error::NormalizationError;
29use crate::domain::agent::ToolCall;
30
31/// Result of feeding one chunk of input to a parser.
32///
33/// All four fields are independent: a single chunk can produce text bytes,
34/// reasoning bytes, completed tool calls, and errors all at once.  Empty
35/// vectors / strings are the common case and indicate "nothing to flush".
36#[derive(Debug, Default, Clone)]
37pub struct ParserOutput {
38    /// Bytes to emit on the downstream text channel.
39    pub forward_text: String,
40    /// Bytes to emit on the downstream reasoning channel.
41    pub forward_reasoning: String,
42    /// Tool calls fully assembled by this chunk.  Each item is ready to be
43    /// emitted as a single, complete `LlmStreamEvent::ToolCallDelta`.
44    pub tool_calls: Vec<ToolCall>,
45    /// Non-fatal normalization issues detected by this chunk.
46    pub errors: Vec<NormalizationError>,
47}
48
49impl ParserOutput {
50    /// Convenience constructor for a passthrough text chunk.
51    #[must_use]
52    pub fn text(s: impl Into<String>) -> Self {
53        Self {
54            forward_text: s.into(),
55            ..Self::default()
56        }
57    }
58
59    /// Convenience constructor for a passthrough reasoning chunk.
60    #[must_use]
61    pub fn reasoning(s: impl Into<String>) -> Self {
62        Self {
63            forward_reasoning: s.into(),
64            ..Self::default()
65        }
66    }
67
68    /// `true` when this output carries no bytes, no tool calls, and no errors.
69    #[must_use]
70    pub const fn is_empty(&self) -> bool {
71        self.forward_text.is_empty()
72            && self.forward_reasoning.is_empty()
73            && self.tool_calls.is_empty()
74            && self.errors.is_empty()
75    }
76}
77
78/// Stream-stateful parser that normalizes a single LLM dialect into
79/// canonical [`ParserOutput`] fragments.
80///
81/// Implementations MUST:
82///
83/// - Be chunk-safe: dialect markers may straddle chunk boundaries, so the
84///   parser must internally buffer ambiguous trailing bytes and flush them
85///   on a later call or at [`Self::finish`].
86/// - Be deterministic: feeding the same byte sequence in any chunking yields
87///   the same total output (modulo when individual bytes flush).
88/// - Never lose input bytes: every byte of input is either forwarded
89///   verbatim, consumed as part of a recognised marker, or surfaced via a
90///   [`NormalizationError`].
91///
92/// Implementations are NOT required to be `Send` here — the
93/// `NormalizingStream` adapter erases the parser through `Box<dyn …>` and
94/// adds the `Send` bound at that boundary.
95pub trait ToolCallParser: Send {
96    /// Feed a chunk that arrived on the upstream text channel.
97    fn push_text(&mut self, chunk: &str) -> ParserOutput;
98
99    /// Feed a chunk that arrived on the upstream reasoning channel.
100    fn push_reasoning(&mut self, chunk: &str) -> ParserOutput;
101
102    /// Flush any buffered partial state at end-of-stream.
103    ///
104    /// Called exactly once per stream, after the last `push_*` call and
105    /// before the surrounding `Done` event is forwarded downstream.
106    /// Implementations should emit any held-back bytes as text and surface
107    /// any unfinished marker state as an [`NormalizationError`].
108    fn finish(&mut self) -> ParserOutput;
109}