gglib_core/domain/thinking/
normalize.rs

1//! Tag normalisation for variant thinking-tag formats.
2//!
3//! Converts all supported tag families to canonical `<think>` / `</think>`.
4
5/// Normalize variant thinking-tag formats to the canonical `<think>` / `</think>`.
6///
7/// This function handles complete text (not streaming chunks).  For streaming
8/// use cases, prefer [`super::ThinkingAccumulator`].
9///
10/// # Supported conversions
11///
12/// - `<seed:think>` → `<think>`, `</seed:think>` → `</think>`
13/// - `<|START_THINKING|>` → `<think>`, `<|END_THINKING|>` → `</think>`
14/// - `<reasoning>` → `<think>`, `</reasoning>` → `</think>`
15pub fn normalize_thinking_tags(text: &str) -> String {
16    if text.is_empty() {
17        return String::new();
18    }
19
20    let mut out = text.to_string();
21
22    // Order matters: longest/most-specific first to avoid partial matches.
23    // <|START_THINKING|> / <|END_THINKING|>
24    out = replace_case_insensitive(&out, "<|START_THINKING|>", "<think>");
25    out = replace_case_insensitive(&out, "<|END_THINKING|>", "</think>");
26
27    // <seed:think> / </seed:think>
28    out = replace_case_insensitive(&out, "<seed:think>", "<think>");
29    out = replace_case_insensitive(&out, "</seed:think>", "</think>");
30
31    // <reasoning> / </reasoning>
32    out = replace_case_insensitive(&out, "<reasoning>", "<think>");
33    out = replace_case_insensitive(&out, "</reasoning>", "</think>");
34
35    out
36}
37
38/// Case-insensitive search-and-replace (no regex dependency).
39fn replace_case_insensitive(haystack: &str, needle: &str, replacement: &str) -> String {
40    let needle_lower = needle.to_lowercase();
41    let hay_lower = haystack.to_lowercase();
42    let mut result = String::with_capacity(haystack.len());
43    let mut start = 0;
44
45    while let Some(pos) = hay_lower[start..].find(&needle_lower) {
46        let abs = start + pos;
47        result.push_str(&haystack[start..abs]);
48        result.push_str(replacement);
49        start = abs + needle.len();
50    }
51    result.push_str(&haystack[start..]);
52    result
53}
54
55#[cfg(test)]
56mod tests {
57    use super::*;
58
59    #[test]
60    fn empty_string_unchanged() {
61        assert_eq!(normalize_thinking_tags(""), "");
62    }
63
64    #[test]
65    fn standard_think_unchanged() {
66        let input = "<think>Some thinking</think>Response";
67        assert_eq!(normalize_thinking_tags(input), input);
68    }
69
70    #[test]
71    fn seed_think_normalized() {
72        let input = "<seed:think>Seed thinking</seed:think>Response";
73        assert_eq!(
74            normalize_thinking_tags(input),
75            "<think>Seed thinking</think>Response"
76        );
77    }
78
79    #[test]
80    fn start_thinking_normalized() {
81        let input = "<|START_THINKING|>Command R thinking<|END_THINKING|>Response";
82        assert_eq!(
83            normalize_thinking_tags(input),
84            "<think>Command R thinking</think>Response"
85        );
86    }
87
88    #[test]
89    fn reasoning_normalized() {
90        let input = "<reasoning>Deep reasoning</reasoning>Response";
91        assert_eq!(
92            normalize_thinking_tags(input),
93            "<think>Deep reasoning</think>Response"
94        );
95    }
96
97    #[test]
98    fn case_insensitive() {
99        assert_eq!(
100            normalize_thinking_tags("<Reasoning>Mixed</Reasoning>"),
101            "<think>Mixed</think>"
102        );
103        assert_eq!(
104            normalize_thinking_tags("<SEED:THINK>Upper</SEED:THINK>"),
105            "<think>Upper</think>"
106        );
107    }
108
109    #[test]
110    fn multiple_normalizations() {
111        let input = "<reasoning>First</reasoning> and <seed:think>Second</seed:think>";
112        assert_eq!(
113            normalize_thinking_tags(input),
114            "<think>First</think> and <think>Second</think>"
115        );
116    }
117}