gglib_core/ports/
model_runtime.rs

1//! Model runtime port for proxy model management.
2//!
3//! This port defines the interface for ensuring a model is running
4//! and ready to serve requests. It abstracts the process management
5//! details from the proxy layer.
6
7use async_trait::async_trait;
8use std::fmt;
9use thiserror::Error;
10
11/// Target information for a running model instance.
12///
13/// This struct contains all information needed to route requests
14/// to a running llama-server instance.
15#[derive(Debug, Clone)]
16pub struct RunningTarget {
17    /// Full URL to the server (e.g., <http://127.0.0.1:5500>).
18    /// Future-proof for non-localhost deployments.
19    pub base_url: String,
20    /// Port the server is listening on.
21    pub port: u16,
22    /// Database ID of the model.
23    pub model_id: u32,
24    /// Human-readable model name (for logging/headers).
25    pub model_name: String,
26    /// Actual context size being used.
27    pub effective_ctx: u64,
28}
29
30impl RunningTarget {
31    /// Create a new `RunningTarget` for a local server.
32    #[must_use]
33    pub fn local(port: u16, model_id: u32, model_name: String, effective_ctx: u64) -> Self {
34        Self {
35            base_url: format!("http://127.0.0.1:{port}"),
36            port,
37            model_id,
38            model_name,
39            effective_ctx,
40        }
41    }
42}
43
44/// Errors that can occur during model runtime operations.
45#[derive(Debug, Error)]
46pub enum ModelRuntimeError {
47    /// The requested model was not found in the catalog.
48    #[error("Model not found: {0}")]
49    ModelNotFound(String),
50
51    /// A model is currently loading; try again later.
52    /// Callers should return 503 Service Unavailable.
53    #[error("Model is loading, try again")]
54    ModelLoading,
55
56    /// Failed to spawn the model server process.
57    #[error("Failed to start model: {0}")]
58    SpawnFailed(String),
59
60    /// The model server failed its health check.
61    #[error("Health check failed: {0}")]
62    HealthCheckFailed(String),
63
64    /// The model file was not found on disk.
65    #[error("Model file not found: {0}")]
66    ModelFileNotFound(String),
67
68    /// Internal error during runtime operations.
69    #[error("Internal error: {0}")]
70    Internal(String),
71}
72
73impl ModelRuntimeError {
74    /// Returns true if this error indicates a temporary condition
75    /// where retrying may succeed.
76    #[must_use]
77    pub const fn is_retryable(&self) -> bool {
78        matches!(self, Self::ModelLoading)
79    }
80
81    /// Returns a suggested HTTP status code for this error.
82    #[must_use]
83    pub const fn suggested_status_code(&self) -> u16 {
84        match self {
85            Self::ModelLoading => 503,
86            Self::ModelNotFound(_) | Self::ModelFileNotFound(_) => 404,
87            Self::SpawnFailed(_) | Self::HealthCheckFailed(_) | Self::Internal(_) => 500,
88        }
89    }
90}
91
92/// Port for managing model runtime (ensuring models are running).
93///
94/// This is the primary interface the proxy uses to get a running
95/// model server. Implementations handle:
96/// - Model resolution (name → file path)
97/// - Process lifecycle (start, stop, health check)
98/// - Context size management
99/// - Single-swap or concurrent strategies
100#[async_trait]
101pub trait ModelRuntimePort: Send + Sync + fmt::Debug {
102    /// Ensure a model is running and ready to serve requests.
103    ///
104    /// This method:
105    /// 1. Resolves the model name to a database entry
106    /// 2. Checks if the model is already running with the correct context
107    /// 3. Starts or restarts the model if needed
108    /// 4. Waits for the health check to pass
109    /// 5. Returns the target information for routing
110    ///
111    /// # Arguments
112    ///
113    /// * `model_name` - Name or alias of the model to run
114    /// * `num_ctx` - Optional context size override from request
115    /// * `default_ctx` - Default context size if not specified
116    ///
117    /// # Errors
118    ///
119    /// Returns `ModelRuntimeError` if the model cannot be started.
120    async fn ensure_model_running(
121        &self,
122        model_name: &str,
123        num_ctx: Option<u64>,
124        default_ctx: u64,
125    ) -> Result<RunningTarget, ModelRuntimeError>;
126
127    /// Get information about the currently running model, if any.
128    ///
129    /// Returns `None` if no model is currently running.
130    async fn current_model(&self) -> Option<RunningTarget>;
131
132    /// Stop the currently running model.
133    ///
134    /// This is primarily for cleanup/shutdown scenarios.
135    async fn stop_current(&self) -> Result<(), ModelRuntimeError>;
136}
gglib_core/ports/model_runtime.rs

gglib_core/ports/
model_runtime.rs