gglib_core/normalize/
registry.rs

1//! Tag-driven parser dispatch.
2//!
3//! [`get_parser`] is the **single source of truth** for which parser handles
4//! which dialect.  Adding a new parser is exactly two file touches:
5//!
6//! 1. Drop a new module under [`super::parsers`].
7//! 2. Add **one** match arm here.
8//!
9//! No other crate looks at `format:*` tags — they call `get_parser` and use
10//! the returned trait object.  This keeps the dialect surface area tightly
11//! contained and prevents drift between callers.
12
13use super::parser::ToolCallParser;
14use super::parsers::{qwen_xml::QwenXmlParser, standard::StandardJsonParser};
15use super::tags;
16
17/// Pick a parser for a model based on its `tags` list.
18///
19/// Tags are scanned in the listed order and the first recognised
20/// `format:*` tag wins.  Models with no recognised tag — the common case —
21/// receive the identity-passthrough [`StandardJsonParser`].
22///
23/// The returned trait object is `Send` because [`ToolCallParser`] requires
24/// `Send`; this lets `NormalizingStream` live on a tokio task without
25/// adding a separate bound.
26#[must_use]
27pub fn get_parser(model_tags: &[String]) -> Box<dyn ToolCallParser> {
28    for t in model_tags {
29        // Future parsers slot in here, one arm each.  Keep this `match`
30        // even with a single arm so adding a new dialect is purely
31        // additive — no structural rewrite required.
32        #[allow(clippy::single_match)]
33        match t.as_str() {
34            tags::FORMAT_QWEN_XML => return Box::new(QwenXmlParser::new()),
35            _ => {}
36        }
37    }
38    Box::new(StandardJsonParser::new())
39}
40
41#[cfg(test)]
42mod tests {
43    use super::*;
44
45    #[test]
46    fn empty_tags_yield_standard_parser() {
47        let mut p = get_parser(&[]);
48        let out = p.push_text("hello");
49        assert_eq!(out.forward_text, "hello");
50    }
51
52    #[test]
53    fn qwen_tag_yields_qwen_parser() {
54        let mut p = get_parser(&[tags::FORMAT_QWEN_XML.to_owned()]);
55        let out = p.push_text(r#"<tool_call>{"name":"x","arguments":{}}</tool_call>"#);
56        let f = p.finish();
57        assert_eq!(out.tool_calls.len(), 1);
58        assert!(
59            f.tool_calls.is_empty(),
60            "tool calls flush in push, not finish"
61        );
62    }
63
64    #[test]
65    fn unknown_tag_falls_back_to_standard() {
66        let mut p = get_parser(&["format:does-not-exist".to_owned()]);
67        let out = p.push_text("<tool_call>passthrough</tool_call>");
68        assert_eq!(out.forward_text, "<tool_call>passthrough</tool_call>");
69    }
70
71    #[test]
72    fn first_recognised_tag_wins() {
73        let tags_v = vec![
74            "format:does-not-exist".to_owned(),
75            tags::FORMAT_QWEN_XML.to_owned(),
76        ];
77        let mut p = get_parser(&tags_v);
78        let out = p.push_text(r#"<tool_call>{"name":"x","arguments":{}}</tool_call>"#);
79        assert_eq!(out.forward_text, "");
80        assert_eq!(out.tool_calls.len(), 1);
81    }
82}