gglib_core/utils/
shard_filename.rs

1//! Utilities for normalizing sharded filenames to a stable base name.
2
3/// Strip shard suffix from a filename to get the stable base name.
4///
5/// This ensures all shards in a group compute the same identity:
6/// - `model-00001-of-00005.gguf` → `model.gguf`
7/// - `llama-3-8b-q4_k_m-00003-of-00008.gguf` → `llama-3-8b-q4_k_m.gguf`
8///
9/// The pattern `-<digits>-of-<digits>` is stripped only when it appears
10/// immediately before the file extension.
11pub fn base_shard_filename(name: &str) -> String {
12    // Find the extension
13    let Some(dot) = name.rfind('.') else {
14        return name.to_string();
15    };
16    let (stem, ext) = name.split_at(dot); // ext includes '.'
17
18    // Look for the last "-<digits>-of-<digits>" at end of stem
19    // Parse from right to left: ... - N - of - M
20    let mut parts = stem.rsplitn(3, '-');
21    let a = parts.next(); // last chunk (should be digits, e.g., "00005")
22    let b = parts.next(); // should be "of"
23    let c = parts.next(); // preceding chunk (e.g., "model-00001" or rest of name)
24
25    match (a, b, c) {
26        (Some(m), Some("of"), Some(prefix_and_n)) if m.chars().all(|ch| ch.is_ascii_digit()) => {
27            // prefix_and_n is "<prefix>-<n>" where n should also be digits
28            if let Some((prefix, n)) = prefix_and_n.rsplit_once('-') {
29                if n.chars().all(|ch| ch.is_ascii_digit()) {
30                    // Valid shard pattern found
31                    return format!("{prefix}{ext}");
32                }
33            }
34            // Doesn't match pattern, return original
35            name.to_string()
36        }
37        _ => name.to_string(),
38    }
39}
40
41#[cfg(test)]
42mod tests {
43    use super::*;
44
45    #[test]
46    fn test_base_shard_filename() {
47        // Sharded filenames
48        assert_eq!(
49            base_shard_filename("model-00001-of-00005.gguf"),
50            "model.gguf"
51        );
52        assert_eq!(
53            base_shard_filename("llama-3-8b-q4_k_m-00003-of-00008.gguf"),
54            "llama-3-8b-q4_k_m.gguf"
55        );
56        assert_eq!(
57            base_shard_filename("model-00010-of-00100.gguf"),
58            "model.gguf"
59        );
60
61        // Non-sharded filenames (should pass through unchanged)
62        assert_eq!(base_shard_filename("model.gguf"), "model.gguf");
63        assert_eq!(
64            base_shard_filename("llama-3-8b-q4_k_m.gguf"),
65            "llama-3-8b-q4_k_m.gguf"
66        );
67
68        // Edge cases
69        assert_eq!(base_shard_filename("noextension"), "noextension");
70        assert_eq!(
71            base_shard_filename("has-numbers-123.gguf"),
72            "has-numbers-123.gguf"
73        );
74        assert_eq!(
75            base_shard_filename("model-of-something.gguf"),
76            "model-of-something.gguf"
77        );
78
79        // Future-proofing: -of- pattern appears earlier but not at end
80        assert_eq!(
81            base_shard_filename("model-of-doom-00001-of-00005.gguf"),
82            "model-of-doom.gguf"
83        );
84        assert_eq!(
85            base_shard_filename("prefix-of-test.gguf"),
86            "prefix-of-test.gguf" // No digits, should not strip
87        );
88
89        // No extension edge case
90        assert_eq!(
91            base_shard_filename("model-00001-of-00005"),
92            "model-00001-of-00005" // No dot, return unchanged
93        );
94    }
95}