gglib_core/domain/
gguf.rs

1//! GGUF domain types.
2//!
3//! This module contains the domain-facing types for GGUF file metadata
4//! and model capabilities. Parsing logic lives in `gglib-gguf`.
5
6use std::collections::{BTreeSet, HashMap};
7use std::fmt;
8
9// =============================================================================
10// Capabilities (Structured, forward-compatible)
11// =============================================================================
12
13bitflags::bitflags! {
14    /// Known model capabilities detected from GGUF metadata.
15    ///
16    /// Uses bitflags for compile-time safety on stable capabilities.
17    /// Unknown/experimental capabilities go in `GgufCapabilities::extensions`.
18    #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
19    pub struct CapabilityFlags: u32 {
20        /// Model supports reasoning/thinking (e.g., DeepSeek R1, QwQ).
21        const REASONING = 0b0000_0001;
22        /// Model supports tool/function calling (e.g., Hermes, Functionary).
23        const TOOL_CALLING = 0b0000_0010;
24        /// Model supports vision/image input.
25        const VISION = 0b0000_0100;
26        /// Model supports code generation.
27        const CODE = 0b0000_1000;
28        /// Model is a mixture-of-experts architecture.
29        const MOE = 0b0001_0000;
30        /// Model contains embedded MTP (Multi-Token Prediction) draft heads.
31        ///
32        /// Detected via the `{arch}.nextn_predict_layers > 0` GGUF metadata key.
33        /// Enables `--spec-type draft-mtp` speculative decoding in llama-server.
34        const MTP = 0b0010_0000;
35    }
36}
37
38/// Model capabilities detected from GGUF metadata.
39///
40/// Combines stable known capabilities (bitflags) with forward-compatible
41/// extension strings for new/experimental capabilities.
42#[derive(Debug, Clone, Default, PartialEq, Eq)]
43pub struct GgufCapabilities {
44    /// Known stable capabilities (compile-time checked).
45    pub flags: CapabilityFlags,
46    /// Unknown/experimental capabilities (forward-compatible).
47    pub extensions: BTreeSet<String>,
48}
49
50impl GgufCapabilities {
51    /// Create empty capabilities.
52    #[must_use]
53    pub const fn empty() -> Self {
54        Self {
55            flags: CapabilityFlags::empty(),
56            extensions: BTreeSet::new(),
57        }
58    }
59
60    /// Check if reasoning is supported.
61    #[must_use]
62    pub const fn has_reasoning(&self) -> bool {
63        self.flags.contains(CapabilityFlags::REASONING)
64    }
65
66    /// Check if tool calling is supported.
67    #[must_use]
68    pub const fn has_tool_calling(&self) -> bool {
69        self.flags.contains(CapabilityFlags::TOOL_CALLING)
70    }
71
72    /// Check if vision is supported.
73    #[must_use]
74    pub const fn has_vision(&self) -> bool {
75        self.flags.contains(CapabilityFlags::VISION)
76    }
77
78    /// Check if MTP (Multi-Token Prediction) draft heads are present.
79    #[must_use]
80    pub const fn has_mtp(&self) -> bool {
81        self.flags.contains(CapabilityFlags::MTP)
82    }
83
84    /// Convert capabilities to tag strings for model metadata.
85    ///
86    /// Returns tags like "reasoning", "agent" (for tool calling), etc.
87    #[must_use]
88    pub fn to_tags(&self) -> Vec<String> {
89        let mut tags = Vec::new();
90
91        if self.has_reasoning() {
92            tags.push("reasoning".to_string());
93        }
94        if self.has_tool_calling() {
95            // "agent" tag triggers --jinja auto-enable
96            tags.push("agent".to_string());
97        }
98        if self.has_vision() {
99            tags.push("vision".to_string());
100        }
101        if self.flags.contains(CapabilityFlags::CODE) {
102            tags.push("code".to_string());
103        }
104        if self.flags.contains(CapabilityFlags::MOE) {
105            tags.push("moe".to_string());
106        }
107        if self.has_mtp() {
108            // "mtp" tag triggers --spec-type draft-mtp auto-enable
109            tags.push("mtp".to_string());
110        }
111
112        // Add extension tags
113        for ext in &self.extensions {
114            if !tags.contains(ext) {
115                tags.push(ext.clone());
116            }
117        }
118
119        tags
120    }
121}
122
123// =============================================================================
124// Metadata value types
125// =============================================================================
126
127/// GGUF metadata value types.
128///
129/// Represents all possible value types that can appear in GGUF metadata.
130#[derive(Debug, Clone)]
131pub enum GgufValue {
132    U8(u8),
133    I8(i8),
134    U16(u16),
135    I16(i16),
136    U32(u32),
137    I32(i32),
138    F32(f32),
139    Bool(bool),
140    String(String),
141    Array(Vec<Self>),
142    U64(u64),
143    I64(i64),
144    F64(f64),
145}
146
147impl fmt::Display for GgufValue {
148    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
149        match self {
150            Self::U8(v) => write!(f, "{v}"),
151            Self::I8(v) => write!(f, "{v}"),
152            Self::U16(v) => write!(f, "{v}"),
153            Self::I16(v) => write!(f, "{v}"),
154            Self::U32(v) => write!(f, "{v}"),
155            Self::I32(v) => write!(f, "{v}"),
156            Self::F32(v) => write!(f, "{v}"),
157            Self::Bool(v) => write!(f, "{v}"),
158            Self::String(v) => write!(f, "{v}"),
159            Self::U64(v) => write!(f, "{v}"),
160            Self::I64(v) => write!(f, "{v}"),
161            Self::F64(v) => write!(f, "{v}"),
162            Self::Array(arr) => {
163                // Limit array output to prevent massive tokenizer vocab dumps
164                if arr.len() > 10 {
165                    write!(f, "[Array with {} elements]", arr.len())
166                } else {
167                    write!(
168                        f,
169                        "[{}]",
170                        arr.iter()
171                            .map(std::string::ToString::to_string)
172                            .collect::<Vec<_>>()
173                            .join(", ")
174                    )
175                }
176            }
177        }
178    }
179}
180
181impl GgufValue {
182    /// Try to convert the value to a u64.
183    ///
184    /// Attempts to convert various numeric GGUF value types to u64.
185    /// Only converts non-negative values to avoid overflow issues.
186    #[must_use]
187    #[allow(clippy::cast_sign_loss)]
188    pub fn as_u64(&self) -> Option<u64> {
189        match self {
190            Self::U8(v) => Some(u64::from(*v)),
191            Self::U16(v) => Some(u64::from(*v)),
192            Self::U32(v) => Some(u64::from(*v)),
193            Self::U64(v) => Some(*v),
194            Self::I8(v) if *v >= 0 => Some(*v as u64),
195            Self::I16(v) if *v >= 0 => Some(*v as u64),
196            Self::I32(v) if *v >= 0 => Some(*v as u64),
197            Self::I64(v) if *v >= 0 => Some(*v as u64),
198            _ => None,
199        }
200    }
201
202    /// Try to convert the value to a f64.
203    #[must_use]
204    #[allow(clippy::cast_precision_loss)]
205    pub fn as_f64(&self) -> Option<f64> {
206        match self {
207            Self::F32(v) => Some(f64::from(*v)),
208            Self::F64(v) => Some(*v),
209            Self::U8(v) => Some(f64::from(*v)),
210            Self::U16(v) => Some(f64::from(*v)),
211            Self::U32(v) => Some(f64::from(*v)),
212            Self::U64(v) => Some(*v as f64),
213            Self::I8(v) => Some(f64::from(*v)),
214            Self::I16(v) => Some(f64::from(*v)),
215            Self::I32(v) => Some(f64::from(*v)),
216            Self::I64(v) => Some(*v as f64),
217            _ => None,
218        }
219    }
220
221    /// Try to get the value as a string reference.
222    #[must_use]
223    pub fn as_str(&self) -> Option<&str> {
224        match self {
225            Self::String(s) => Some(s),
226            _ => None,
227        }
228    }
229}
230
231// =============================================================================
232// Metadata
233// =============================================================================
234
235/// Parsed metadata from a GGUF file.
236///
237/// This is the domain-facing type used by services and ports.
238/// Parsing logic that produces this type lives in `gglib-gguf`.
239#[derive(Debug, Clone, Default)]
240pub struct GgufMetadata {
241    /// Model name from general.name metadata or filename.
242    pub name: Option<String>,
243    /// Model architecture (e.g., "llama", "mistral").
244    pub architecture: Option<String>,
245    /// Quantization type (e.g., "`Q4_K_M`", "`Q8_0`").
246    pub quantization: Option<String>,
247    /// Number of parameters in billions.
248    pub param_count_b: Option<f64>,
249    /// Maximum context length.
250    pub context_length: Option<u64>,
251    /// Number of experts (for `MoE` models).
252    pub expert_count: Option<u32>,
253    /// Number of experts used during inference (for `MoE` models).
254    pub expert_used_count: Option<u32>,
255    /// Number of shared experts (for `MoE` models).
256    pub expert_shared_count: Option<u32>,
257    /// Additional key-value metadata from the file (string representation).
258    pub metadata: HashMap<String, String>,
259}
260
261/// Raw metadata from GGUF parsing (before string conversion).
262///
263/// Used internally by parsers; services typically use `GgufMetadata`.
264pub type RawMetadata = HashMap<String, GgufValue>;
265
266// =============================================================================
267// Detection results (for detailed analysis)
268// =============================================================================
269
270/// Result of reasoning capability detection.
271#[derive(Debug, Clone, Default)]
272pub struct ReasoningDetection {
273    /// Whether the model appears to support reasoning/thinking.
274    pub supports_reasoning: bool,
275    /// Confidence level of the detection (0.0 to 1.0).
276    pub confidence: f32,
277    /// The specific pattern(s) that matched.
278    pub matched_patterns: Vec<String>,
279    /// Suggested reasoning format for llama-server.
280    pub suggested_format: Option<String>,
281}
282
283/// Result of tool calling capability detection.
284#[derive(Debug, Clone, Default)]
285pub struct ToolCallingDetection {
286    /// Whether the model appears to support tool/function calling.
287    pub supports_tool_calling: bool,
288    /// Confidence level of the detection (0.0 to 1.0).
289    pub confidence: f32,
290    /// The specific pattern(s) that matched.
291    pub matched_patterns: Vec<String>,
292    /// Detected tool calling format (e.g., "hermes", "llama3", "mistral").
293    pub detected_format: Option<String>,
294}
295
296// =============================================================================
297// Tests
298// =============================================================================
299
300#[cfg(test)]
301mod tests {
302    use super::*;
303
304    #[test]
305    fn test_capabilities_empty() {
306        let caps = GgufCapabilities::empty();
307        assert!(!caps.has_reasoning());
308        assert!(!caps.has_tool_calling());
309        assert!(caps.to_tags().is_empty());
310    }
311
312    #[test]
313    fn test_capabilities_flags() {
314        let caps = GgufCapabilities {
315            flags: CapabilityFlags::REASONING | CapabilityFlags::TOOL_CALLING,
316            extensions: BTreeSet::new(),
317        };
318        assert!(caps.has_reasoning());
319        assert!(caps.has_tool_calling());
320
321        let tags = caps.to_tags();
322        assert!(tags.contains(&"reasoning".to_string()));
323        assert!(tags.contains(&"agent".to_string()));
324    }
325
326    #[test]
327    fn test_capabilities_extensions() {
328        let mut extensions = BTreeSet::new();
329        extensions.insert("experimental-feature".to_string());
330
331        let caps = GgufCapabilities {
332            flags: CapabilityFlags::empty(),
333            extensions,
334        };
335
336        let tags = caps.to_tags();
337        assert!(tags.contains(&"experimental-feature".to_string()));
338    }
339
340    #[test]
341    fn test_gguf_value_as_u64() {
342        assert_eq!(GgufValue::U32(4096).as_u64(), Some(4096));
343        assert_eq!(GgufValue::I32(-1).as_u64(), None);
344        assert_eq!(GgufValue::String("hello".to_string()).as_u64(), None);
345        assert_eq!(GgufValue::I32(100).as_u64(), Some(100));
346    }
347
348    #[test]
349    fn test_gguf_value_as_f64() {
350        assert!((GgufValue::F32(7.5).as_f64().unwrap() - 7.5).abs() < f64::EPSILON);
351        assert!((GgufValue::U64(1000).as_f64().unwrap() - 1000.0).abs() < f64::EPSILON);
352        assert_eq!(GgufValue::Bool(true).as_f64(), None);
353    }
354
355    #[test]
356    fn test_gguf_value_display() {
357        assert_eq!(GgufValue::U32(42).to_string(), "42");
358        assert_eq!(GgufValue::String("test".to_string()).to_string(), "test");
359
360        let large_array = GgufValue::Array(vec![GgufValue::U8(0); 100]);
361        assert!(large_array.to_string().contains("100 elements"));
362    }
363}