gglib_core/normalize/
history.rs

1//! Cross-turn "thinking debt" removal for chat history.
2//!
3//! Small reasoning models (e.g. Qwen3.5-4B) have a strong tendency to
4//! pattern-match their own previous `<think>` traces in the conversation
5//! history and produce an unbounded thinking stream that never closes —
6//! the model sees prior reasoning trails and tries to extend them. The
7//! reference fix, mirroring `OpenAI`'s native behavior, is to drop reasoning
8//! artifacts from prior assistant turns before the model sees them.
9//!
10//! This module is the **single source of truth** for that scrub. Every
11//! surface that builds a chat-completion request body must pipe its
12//! `messages` array through [`strip_thinking_debt`] so the proxy, the
13//! in-process agent loop (CLI / Tauri), and any future direct-mode
14//! consumer all benefit equally.
15//!
16//! The transform is:
17//!
18//! * Unconditional — there is no per-model gate. Non-reasoning messages
19//!   simply have nothing to strip and pass through untouched.
20//! * Defensive — only assistant messages are touched; user, system, tool,
21//!   and developer messages are never modified.
22//! * Conservative on shape — the `reasoning_content` key is removed
23//!   outright. String `content` has every `<think>...</think>` block
24//!   excised. Non-string `content` (multi-part array form) is left alone.
25//! * Forward-safe on unclosed tags — an unclosed `<think>` from the most
26//!   recent turn is preserved verbatim; the upstream is responsible for
27//!   closing it.
28
29use serde_json::Value;
30
31/// Strip reasoning artifacts from prior assistant messages in `messages`.
32///
33/// Returns the number of assistant entries that were modified. A return
34/// value of `0` means the caller can safely skip any re-serialization
35/// step.
36///
37/// See the module docs for the exact rules.
38pub fn strip_thinking_debt(messages: &mut [Value]) -> usize {
39    let mut touched = 0usize;
40    for msg in messages.iter_mut() {
41        let Some(obj) = msg.as_object_mut() else {
42            continue;
43        };
44        let is_assistant = obj
45            .get("role")
46            .and_then(|r| r.as_str())
47            .is_some_and(|r| r == "assistant");
48        if !is_assistant {
49            continue;
50        }
51
52        let removed_reasoning = obj.remove("reasoning_content").is_some();
53        let stripped_inline = if let Some(Value::String(s)) = obj.get_mut("content") {
54            strip_think_blocks(s).is_some_and(|new_s| {
55                *s = new_s;
56                true
57            })
58        } else {
59            false
60        };
61
62        if removed_reasoning || stripped_inline {
63            touched += 1;
64        }
65    }
66    touched
67}
68
69/// Remove every `<think>...</think>` block from `s`.
70///
71/// Returns `Some(new_string)` when at least one block was removed,
72/// otherwise `None` so the caller can avoid a needless allocation.
73/// Matching is case-sensitive: each `<think>` is paired with the next
74/// `</think>` that follows it. An unclosed `<think>` is left intact (the
75/// upstream model is responsible for closing it).
76fn strip_think_blocks(s: &str) -> Option<String> {
77    const OPEN: &str = "<think>";
78    const CLOSE: &str = "</think>";
79
80    if !s.contains(OPEN) {
81        return None;
82    }
83
84    let mut out = String::with_capacity(s.len());
85    let mut rest = s;
86    let mut changed = false;
87    while let Some(open_idx) = rest.find(OPEN) {
88        let after_open = &rest[open_idx + OPEN.len()..];
89        let Some(close_off) = after_open.find(CLOSE) else {
90            // Unclosed <think>: keep verbatim, stop scanning.
91            break;
92        };
93        out.push_str(&rest[..open_idx]);
94        rest = &after_open[close_off + CLOSE.len()..];
95        changed = true;
96    }
97    if !changed {
98        return None;
99    }
100    out.push_str(rest);
101    Some(out.trim().to_string())
102}
103
104#[cfg(test)]
105mod tests {
106    use super::*;
107    use serde_json::json;
108
109    fn msgs(v: Value) -> Vec<Value> {
110        match v {
111            Value::Array(a) => a,
112            other => panic!("expected array, got {other:?}"),
113        }
114    }
115
116    #[test]
117    fn strip_removes_reasoning_content_from_assistant_message() {
118        let mut m = msgs(json!([
119            {"role": "user", "content": "hi"},
120            {"role": "assistant", "content": "hello", "reasoning_content": "long ramble..."}
121        ]));
122        let touched = strip_thinking_debt(&mut m);
123        assert_eq!(touched, 1);
124        assert_eq!(m[1]["content"], "hello");
125        assert!(m[1].get("reasoning_content").is_none());
126        assert_eq!(m[0]["content"], "hi");
127    }
128
129    #[test]
130    fn strip_removes_inline_think_blocks_from_assistant_content() {
131        let mut m = msgs(json!([
132            {"role": "assistant", "content": "<think>secret\nplan</think>The answer is 42."}
133        ]));
134        let touched = strip_thinking_debt(&mut m);
135        assert_eq!(touched, 1);
136        assert_eq!(m[0]["content"], "The answer is 42.");
137    }
138
139    #[test]
140    fn strip_handles_multiple_think_blocks() {
141        let mut m = msgs(json!([
142            {"role": "assistant", "content": "<think>a</think>between<think>b</think>after"}
143        ]));
144        strip_thinking_debt(&mut m);
145        assert_eq!(m[0]["content"], "betweenafter");
146    }
147
148    #[test]
149    fn strip_leaves_unclosed_think_intact() {
150        let mut m = msgs(json!([
151            {"role": "assistant", "content": "<think>still going..."}
152        ]));
153        let touched = strip_thinking_debt(&mut m);
154        assert_eq!(touched, 0);
155        assert_eq!(m[0]["content"], "<think>still going...");
156    }
157
158    #[test]
159    fn strip_does_not_touch_user_or_system_or_tool_messages() {
160        let original = json!([
161            {"role": "system", "content": "<think>policy</think>be helpful", "reasoning_content": "x"},
162            {"role": "user", "content": "<think>ignore</think>question", "reasoning_content": "y"},
163            {"role": "tool", "content": "<think>tool</think>result", "tool_call_id": "c1", "reasoning_content": "z"}
164        ]);
165        let mut m = msgs(original.clone());
166        let touched = strip_thinking_debt(&mut m);
167        assert_eq!(touched, 0);
168        assert_eq!(Value::Array(m), original);
169    }
170
171    #[test]
172    fn strip_handles_empty_messages_array() {
173        let mut m: Vec<Value> = Vec::new();
174        let touched = strip_thinking_debt(&mut m);
175        assert_eq!(touched, 0);
176        assert!(m.is_empty());
177    }
178
179    #[test]
180    fn strip_skips_when_nothing_to_remove() {
181        let original = json!([
182            {"role": "assistant", "content": "plain answer"}
183        ]);
184        let mut m = msgs(original.clone());
185        let touched = strip_thinking_debt(&mut m);
186        assert_eq!(touched, 0);
187        assert_eq!(Value::Array(m), original);
188    }
189
190    #[test]
191    fn strip_preserves_non_string_content() {
192        // Array-form content (OpenAI multi-part) is left alone; only
193        // reasoning_content gets removed.
194        let mut m = msgs(json!([
195            {
196                "role": "assistant",
197                "content": [{"type": "text", "text": "<think>x</think>hi"}],
198                "reasoning_content": "r"
199            }
200        ]));
201        let touched = strip_thinking_debt(&mut m);
202        assert_eq!(touched, 1);
203        assert!(m[0].get("reasoning_content").is_none());
204        assert_eq!(m[0]["content"][0]["text"], "<think>x</think>hi");
205    }
206
207    #[test]
208    fn strip_skips_non_object_messages() {
209        // Defensive: a stray non-object entry should not panic.
210        let mut m = vec![
211            Value::String("garbage".to_string()),
212            json!({
213                "role": "assistant",
214                "reasoning_content": "drop me"
215            }),
216        ];
217        let touched = strip_thinking_debt(&mut m);
218        assert_eq!(touched, 1);
219        assert!(m[1].get("reasoning_content").is_none());
220    }
221
222    #[test]
223    fn strip_handles_assistant_without_role_string() {
224        // role is a number — defensively treat as not assistant.
225        let mut m = msgs(json!([
226            {"role": 7, "content": "<think>x</think>y", "reasoning_content": "r"}
227        ]));
228        let touched = strip_thinking_debt(&mut m);
229        assert_eq!(touched, 0);
230        assert_eq!(m[0]["reasoning_content"], "r");
231    }
232
233    #[test]
234    fn strip_handles_assistant_with_only_inline_think() {
235        // reasoning_content absent, but inline <think> present.
236        let mut m = msgs(json!([
237            {"role": "assistant", "content": "<think>a</think>b"}
238        ]));
239        let touched = strip_thinking_debt(&mut m);
240        assert_eq!(touched, 1);
241        assert_eq!(m[0]["content"], "b");
242    }
243}
gglib_core/normalize/history.rs

gglib_core/normalize/
history.rs