gglib_core/normalize/parser.rs
1//! The [`ToolCallParser`] trait and its companion [`ParserOutput`].
2//!
3//! A parser consumes raw text or reasoning chunks from an LLM stream and
4//! produces a normalized [`ParserOutput`] containing:
5//!
6//! - `forward_text` — bytes that should appear in the downstream
7//! `LlmStreamEvent::TextDelta`.
8//! - `forward_reasoning` — bytes that should appear in the downstream
9//! `LlmStreamEvent::ReasoningDelta`.
10//! - `tool_calls` — fully-assembled tool calls extracted from dialect
11//! markup (e.g. Qwen XML).
12//! - `errors` — non-fatal normalization failures.
13//!
14//! Parsers are stream-stateful: the caller (the `NormalizingStream` adapter)
15//! constructs one parser per stream and drives it with chunk-by-chunk input.
16//! Parsers must therefore be chunk-safe — they buffer ambiguous trailing
17//! bytes internally and flush them on the next call or at [`ToolCallParser::finish`].
18//!
19//! ## Adding a new parser
20//!
21//! 1. Implement [`ToolCallParser`] in a new file under
22//! [`super::parsers`].
23//! 2. Add a single match arm to [`super::registry::get_parser`] keyed on a
24//! new `format:*` tag (see [`super::tags`]).
25//!
26//! No other crate participates in the dispatch decision.
27
28use super::error::NormalizationError;
29use crate::domain::agent::ToolCall;
30
31/// Result of feeding one chunk of input to a parser.
32///
33/// All four fields are independent: a single chunk can produce text bytes,
34/// reasoning bytes, completed tool calls, and errors all at once. Empty
35/// vectors / strings are the common case and indicate "nothing to flush".
36#[derive(Debug, Default, Clone)]
37pub struct ParserOutput {
38 /// Bytes to emit on the downstream text channel.
39 pub forward_text: String,
40 /// Bytes to emit on the downstream reasoning channel.
41 pub forward_reasoning: String,
42 /// Tool calls fully assembled by this chunk. Each item is ready to be
43 /// emitted as a single, complete `LlmStreamEvent::ToolCallDelta`.
44 pub tool_calls: Vec<ToolCall>,
45 /// Non-fatal normalization issues detected by this chunk.
46 pub errors: Vec<NormalizationError>,
47}
48
49impl ParserOutput {
50 /// Convenience constructor for a passthrough text chunk.
51 #[must_use]
52 pub fn text(s: impl Into<String>) -> Self {
53 Self {
54 forward_text: s.into(),
55 ..Self::default()
56 }
57 }
58
59 /// Convenience constructor for a passthrough reasoning chunk.
60 #[must_use]
61 pub fn reasoning(s: impl Into<String>) -> Self {
62 Self {
63 forward_reasoning: s.into(),
64 ..Self::default()
65 }
66 }
67
68 /// `true` when this output carries no bytes, no tool calls, and no errors.
69 #[must_use]
70 pub const fn is_empty(&self) -> bool {
71 self.forward_text.is_empty()
72 && self.forward_reasoning.is_empty()
73 && self.tool_calls.is_empty()
74 && self.errors.is_empty()
75 }
76}
77
78/// Stream-stateful parser that normalizes a single LLM dialect into
79/// canonical [`ParserOutput`] fragments.
80///
81/// Implementations MUST:
82///
83/// - Be chunk-safe: dialect markers may straddle chunk boundaries, so the
84/// parser must internally buffer ambiguous trailing bytes and flush them
85/// on a later call or at [`Self::finish`].
86/// - Be deterministic: feeding the same byte sequence in any chunking yields
87/// the same total output (modulo when individual bytes flush).
88/// - Never lose input bytes: every byte of input is either forwarded
89/// verbatim, consumed as part of a recognised marker, or surfaced via a
90/// [`NormalizationError`].
91///
92/// Implementations are NOT required to be `Send` here — the
93/// `NormalizingStream` adapter erases the parser through `Box<dyn …>` and
94/// adds the `Send` bound at that boundary.
95pub trait ToolCallParser: Send {
96 /// Feed a chunk that arrived on the upstream text channel.
97 fn push_text(&mut self, chunk: &str) -> ParserOutput;
98
99 /// Feed a chunk that arrived on the upstream reasoning channel.
100 fn push_reasoning(&mut self, chunk: &str) -> ParserOutput;
101
102 /// Flush any buffered partial state at end-of-stream.
103 ///
104 /// Called exactly once per stream, after the last `push_*` call and
105 /// before the surrounding `Done` event is forwarded downstream.
106 /// Implementations should emit any held-back bytes as text and surface
107 /// any unfinished marker state as an [`NormalizationError`].
108 fn finish(&mut self) -> ParserOutput;
109}