gglib_core/domain/thinking/
parse.rs

1//! Complete-message parsing and embedding of thinking content.
2//!
3//! These functions work on full (non-streaming) messages.  For streaming,
4//! see [`super::ThinkingAccumulator`].
5
6use super::normalize::normalize_thinking_tags;
7use super::types::ParsedThinkingContent;
8
9/// Parse thinking content from a complete message.
10///
11/// The thinking block must appear at the very start of the text.
12/// Tags are normalised before matching, so all four formats are accepted.
13///
14/// ```
15/// use gglib_core::domain::thinking::parse_thinking_content;
16///
17/// let r = parse_thinking_content("<think>step 1</think>\nHello!");
18/// assert_eq!(r.thinking.as_deref(), Some("step 1"));
19/// assert_eq!(r.content, "Hello!");
20/// ```
21pub fn parse_thinking_content(text: &str) -> ParsedThinkingContent {
22    let empty = ParsedThinkingContent {
23        thinking: None,
24        content: String::new(),
25        duration_seconds: None,
26    };
27
28    if text.is_empty() {
29        return empty;
30    }
31
32    let normalized = normalize_thinking_tags(text);
33
34    // Match: ^<think(\s+duration="FLOAT")?\s*>(BODY)</think>\s*
35    if let Some(after_tag) = strip_prefix_ci(&normalized, "<think") {
36        // Parse optional attributes until '>'
37        let after_tag = after_tag.trim_start();
38        let (duration, rest) = parse_open_tag_attrs(after_tag);
39
40        // `rest` starts right after the '>'
41        if let Some(end_pos) = find_ci(rest, "</think>") {
42            let thinking_raw = &rest[..end_pos];
43            let thinking = thinking_raw.trim();
44            let after_close = &rest[end_pos + "</think>".len()..];
45            let content = after_close.trim_start().to_string();
46
47            return ParsedThinkingContent {
48                thinking: if thinking.is_empty() {
49                    None
50                } else {
51                    Some(thinking.to_string())
52                },
53                content,
54                duration_seconds: duration,
55            };
56        }
57    }
58
59    // No match — return original text as content.
60    ParsedThinkingContent {
61        thinking: None,
62        content: text.to_string(),
63        duration_seconds: None,
64    }
65}
66
67/// Embed thinking content into a message using canonical `<think>` tags.
68///
69/// Round-trips with [`parse_thinking_content`].
70///
71/// ```
72/// use gglib_core::domain::thinking::embed_thinking_content;
73///
74/// let msg = embed_thinking_content(Some("step 1"), "Answer", Some(3.5));
75/// assert_eq!(msg, "<think duration=\"3.5\">step 1</think>\nAnswer");
76/// ```
77pub fn embed_thinking_content(
78    thinking: Option<&str>,
79    content: &str,
80    duration_seconds: Option<f64>,
81) -> String {
82    match thinking {
83        Some(t) if !t.is_empty() => {
84            let dur_attr =
85                duration_seconds.map_or_else(String::new, |d| format!(" duration=\"{d:.1}\""));
86            format!("<think{dur_attr}>{t}</think>\n{content}")
87        }
88        _ => content.to_string(),
89    }
90}
91
92/// Lightweight check for whether text begins with a thinking tag.
93pub fn has_thinking_content(text: &str) -> bool {
94    let trimmed = text.trim_start().to_lowercase();
95    trimmed.starts_with("<think")
96        || trimmed.starts_with("<reasoning")
97        || trimmed.starts_with("<seed:think")
98        || trimmed.starts_with("<|start_thinking|")
99}
100
101/// Format a duration for human display.
102///
103/// ```
104/// use gglib_core::domain::thinking::format_thinking_duration;
105///
106/// assert_eq!(format_thinking_duration(5.5), "5.5s");
107/// assert_eq!(format_thinking_duration(90.0), "1m 30s");
108/// ```
109#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
110pub fn format_thinking_duration(seconds: f64) -> String {
111    if seconds < 60.0 {
112        format!("{seconds:.1}s")
113    } else {
114        let minutes = (seconds / 60.0).floor() as u64;
115        let remaining = (seconds % 60.0).round() as u64;
116        format!("{minutes}m {remaining}s")
117    }
118}
119
120// ---------------------------------------------------------------------------
121// Private helpers
122// ---------------------------------------------------------------------------
123
124/// Case-insensitive prefix strip: if `text` starts with `prefix` (case-insensitive),
125/// returns the rest of `text` (preserving original case).
126fn strip_prefix_ci<'a>(text: &'a str, prefix: &str) -> Option<&'a str> {
127    let text_lower = text.to_lowercase();
128    if text_lower.starts_with(&prefix.to_lowercase()) {
129        Some(&text[prefix.len()..])
130    } else {
131        None
132    }
133}
134
135/// Case-insensitive find.
136fn find_ci(haystack: &str, needle: &str) -> Option<usize> {
137    let h = haystack.to_lowercase();
138    let n = needle.to_lowercase();
139    h.find(&n)
140}
141
142/// Parse the attribute section of an opening `<think ...>` tag.
143/// Input should be the text after `<think` and before/including `>`.
144/// Returns `(duration_option, rest_after_closing_bracket)`.
145fn parse_open_tag_attrs(s: &str) -> (Option<f64>, &str) {
146    s.find('>').map_or((None, s), |gt| {
147        let attrs = &s[..gt];
148        let rest = &s[gt + 1..];
149        (parse_duration_attr(attrs), rest)
150    })
151}
152
153/// Extract the `duration` attribute value from a tag attribute string.
154fn parse_duration_attr(attrs: &str) -> Option<f64> {
155    let lower = attrs.to_lowercase();
156    if let Some(pos) = lower.find("duration=\"") {
157        let start = pos + "duration=\"".len();
158        if let Some(end) = lower[start..].find('"') {
159            return attrs[start..start + end].parse::<f64>().ok();
160        }
161    }
162    None
163}