gglib_core/ports/model_runtime.rs
1//! Model runtime port for proxy model management.
2//!
3//! This port defines the interface for ensuring a model is running
4//! and ready to serve requests. It abstracts the process management
5//! details from the proxy layer.
6
7use async_trait::async_trait;
8use std::fmt;
9use thiserror::Error;
10
11/// Target information for a running model instance.
12///
13/// This struct contains all information needed to route requests
14/// to a running llama-server instance.
15#[derive(Debug, Clone)]
16pub struct RunningTarget {
17 /// Full URL to the server (e.g., <http://127.0.0.1:5500>).
18 /// Future-proof for non-localhost deployments.
19 pub base_url: String,
20 /// Port the server is listening on.
21 pub port: u16,
22 /// Database ID of the model.
23 pub model_id: u32,
24 /// Human-readable model name (for logging/headers).
25 pub model_name: String,
26 /// Actual context size being used.
27 pub effective_ctx: u64,
28}
29
30impl RunningTarget {
31 /// Create a new `RunningTarget` for a local server.
32 #[must_use]
33 pub fn local(port: u16, model_id: u32, model_name: String, effective_ctx: u64) -> Self {
34 Self {
35 base_url: format!("http://127.0.0.1:{port}"),
36 port,
37 model_id,
38 model_name,
39 effective_ctx,
40 }
41 }
42}
43
44/// Errors that can occur during model runtime operations.
45#[derive(Debug, Error)]
46pub enum ModelRuntimeError {
47 /// The requested model was not found in the catalog.
48 #[error("Model not found: {0}")]
49 ModelNotFound(String),
50
51 /// A model is currently loading; try again later.
52 /// Callers should return 503 Service Unavailable.
53 #[error("Model is loading, try again")]
54 ModelLoading,
55
56 /// Failed to spawn the model server process.
57 #[error("Failed to start model: {0}")]
58 SpawnFailed(String),
59
60 /// The model server failed its health check.
61 #[error("Health check failed: {0}")]
62 HealthCheckFailed(String),
63
64 /// The model file was not found on disk.
65 #[error("Model file not found: {0}")]
66 ModelFileNotFound(String),
67
68 /// Internal error during runtime operations.
69 #[error("Internal error: {0}")]
70 Internal(String),
71}
72
73impl ModelRuntimeError {
74 /// Returns true if this error indicates a temporary condition
75 /// where retrying may succeed.
76 #[must_use]
77 pub const fn is_retryable(&self) -> bool {
78 matches!(self, Self::ModelLoading)
79 }
80
81 /// Returns a suggested HTTP status code for this error.
82 #[must_use]
83 pub const fn suggested_status_code(&self) -> u16 {
84 match self {
85 Self::ModelLoading => 503,
86 Self::ModelNotFound(_) | Self::ModelFileNotFound(_) => 404,
87 Self::SpawnFailed(_) | Self::HealthCheckFailed(_) | Self::Internal(_) => 500,
88 }
89 }
90}
91
92/// Port for managing model runtime (ensuring models are running).
93///
94/// This is the primary interface the proxy uses to get a running
95/// model server. Implementations handle:
96/// - Model resolution (name → file path)
97/// - Process lifecycle (start, stop, health check)
98/// - Context size management
99/// - Single-swap or concurrent strategies
100#[async_trait]
101pub trait ModelRuntimePort: Send + Sync + fmt::Debug {
102 /// Ensure a model is running and ready to serve requests.
103 ///
104 /// This method:
105 /// 1. Resolves the model name to a database entry
106 /// 2. Checks if the model is already running with the correct context
107 /// 3. Starts or restarts the model if needed
108 /// 4. Waits for the health check to pass
109 /// 5. Returns the target information for routing
110 ///
111 /// # Arguments
112 ///
113 /// * `model_name` - Name or alias of the model to run
114 /// * `num_ctx` - Optional context size override from request
115 /// * `default_ctx` - Default context size if not specified
116 ///
117 /// # Errors
118 ///
119 /// Returns `ModelRuntimeError` if the model cannot be started.
120 async fn ensure_model_running(
121 &self,
122 model_name: &str,
123 num_ctx: Option<u64>,
124 default_ctx: u64,
125 ) -> Result<RunningTarget, ModelRuntimeError>;
126
127 /// Get information about the currently running model, if any.
128 ///
129 /// Returns `None` if no model is currently running.
130 async fn current_model(&self) -> Option<RunningTarget>;
131
132 /// Stop the currently running model.
133 ///
134 /// This is primarily for cleanup/shutdown scenarios.
135 async fn stop_current(&self) -> Result<(), ModelRuntimeError>;
136}