gglib_core/utils/shard_filename.rs
1//! Utilities for normalizing sharded filenames to a stable base name.
2
3/// Strip shard suffix from a filename to get the stable base name.
4///
5/// This ensures all shards in a group compute the same identity:
6/// - `model-00001-of-00005.gguf` → `model.gguf`
7/// - `llama-3-8b-q4_k_m-00003-of-00008.gguf` → `llama-3-8b-q4_k_m.gguf`
8///
9/// The pattern `-<digits>-of-<digits>` is stripped only when it appears
10/// immediately before the file extension.
11pub fn base_shard_filename(name: &str) -> String {
12 // Find the extension
13 let Some(dot) = name.rfind('.') else {
14 return name.to_string();
15 };
16 let (stem, ext) = name.split_at(dot); // ext includes '.'
17
18 // Look for the last "-<digits>-of-<digits>" at end of stem
19 // Parse from right to left: ... - N - of - M
20 let mut parts = stem.rsplitn(3, '-');
21 let a = parts.next(); // last chunk (should be digits, e.g., "00005")
22 let b = parts.next(); // should be "of"
23 let c = parts.next(); // preceding chunk (e.g., "model-00001" or rest of name)
24
25 match (a, b, c) {
26 (Some(m), Some("of"), Some(prefix_and_n)) if m.chars().all(|ch| ch.is_ascii_digit()) => {
27 // prefix_and_n is "<prefix>-<n>" where n should also be digits
28 if let Some((prefix, n)) = prefix_and_n.rsplit_once('-') {
29 if n.chars().all(|ch| ch.is_ascii_digit()) {
30 // Valid shard pattern found
31 return format!("{prefix}{ext}");
32 }
33 }
34 // Doesn't match pattern, return original
35 name.to_string()
36 }
37 _ => name.to_string(),
38 }
39}
40
41#[cfg(test)]
42mod tests {
43 use super::*;
44
45 #[test]
46 fn test_base_shard_filename() {
47 // Sharded filenames
48 assert_eq!(
49 base_shard_filename("model-00001-of-00005.gguf"),
50 "model.gguf"
51 );
52 assert_eq!(
53 base_shard_filename("llama-3-8b-q4_k_m-00003-of-00008.gguf"),
54 "llama-3-8b-q4_k_m.gguf"
55 );
56 assert_eq!(
57 base_shard_filename("model-00010-of-00100.gguf"),
58 "model.gguf"
59 );
60
61 // Non-sharded filenames (should pass through unchanged)
62 assert_eq!(base_shard_filename("model.gguf"), "model.gguf");
63 assert_eq!(
64 base_shard_filename("llama-3-8b-q4_k_m.gguf"),
65 "llama-3-8b-q4_k_m.gguf"
66 );
67
68 // Edge cases
69 assert_eq!(base_shard_filename("noextension"), "noextension");
70 assert_eq!(
71 base_shard_filename("has-numbers-123.gguf"),
72 "has-numbers-123.gguf"
73 );
74 assert_eq!(
75 base_shard_filename("model-of-something.gguf"),
76 "model-of-something.gguf"
77 );
78
79 // Future-proofing: -of- pattern appears earlier but not at end
80 assert_eq!(
81 base_shard_filename("model-of-doom-00001-of-00005.gguf"),
82 "model-of-doom.gguf"
83 );
84 assert_eq!(
85 base_shard_filename("prefix-of-test.gguf"),
86 "prefix-of-test.gguf" // No digits, should not strip
87 );
88
89 // No extension edge case
90 assert_eq!(
91 base_shard_filename("model-00001-of-00005"),
92 "model-00001-of-00005" // No dot, return unchanged
93 );
94 }
95}