gglib_core/ports/
process_runner.rs

1//! Process runner trait definition.
2//!
3//! This port defines the interface for managing model server processes.
4//! Implementations handle all process lifecycle details internally.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use std::path::PathBuf;
9
10use super::ProcessError;
11use crate::domain::InferenceConfig;
12
13/// Configuration for starting a model server.
14///
15/// This is an intent-based configuration — it expresses what the caller
16/// wants, not how the server should be started. All typed fields are
17/// handled by `build_and_spawn()`; `extra_args` is an escape hatch for
18/// flags not yet promoted to first-class fields.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ServerConfig {
21    /// Database ID of the model to serve.
22    pub model_id: i64,
23    /// Human-readable model name.
24    pub model_name: String,
25    /// Path to the model file.
26    pub model_path: PathBuf,
27    /// Port to listen on (if None, a free port will be assigned).
28    pub port: Option<u16>,
29    /// Base port for allocation when port is None.
30    pub base_port: u16,
31    /// Context size to use (if None, use model default).
32    pub context_size: Option<u64>,
33    /// Number of GPU layers to offload (if None, use default).
34    pub gpu_layers: Option<i32>,
35    /// Enable Jinja templating for chat formats.
36    pub jinja: bool,
37    /// Reasoning format override (e.g., `"deepseek"`, `"none"`).
38    pub reasoning_format: Option<String>,
39    /// Inference sampling parameters (temperature, `top_p`, etc.).
40    pub inference_config: Option<InferenceConfig>,
41    /// Additional server-specific options (escape hatch).
42    pub extra_args: Vec<String>,
43}
44
45impl ServerConfig {
46    /// Create a new server configuration with required fields.
47    #[must_use]
48    pub const fn new(
49        model_id: i64,
50        model_name: String,
51        model_path: PathBuf,
52        base_port: u16,
53    ) -> Self {
54        Self {
55            model_id,
56            model_name,
57            model_path,
58            port: None,
59            base_port,
60            context_size: None,
61            gpu_layers: None,
62            jinja: false,
63            reasoning_format: None,
64            inference_config: None,
65            extra_args: Vec::new(),
66        }
67    }
68
69    /// Set the port to listen on.
70    #[must_use]
71    pub const fn with_port(mut self, port: u16) -> Self {
72        self.port = Some(port);
73        self
74    }
75
76    /// Set the context size.
77    #[must_use]
78    pub const fn with_context_size(mut self, size: u64) -> Self {
79        self.context_size = Some(size);
80        self
81    }
82
83    /// Set the number of GPU layers.
84    #[must_use]
85    pub const fn with_gpu_layers(mut self, layers: i32) -> Self {
86        self.gpu_layers = Some(layers);
87        self
88    }
89
90    /// Enable Jinja templating.
91    #[must_use]
92    pub const fn with_jinja(mut self) -> Self {
93        self.jinja = true;
94        self
95    }
96
97    /// Set the reasoning format (e.g., `"deepseek"`, `"none"`).
98    #[must_use]
99    pub fn with_reasoning_format(mut self, format: String) -> Self {
100        self.reasoning_format = Some(format);
101        self
102    }
103
104    /// Set inference sampling parameters.
105    #[must_use]
106    pub const fn with_inference_config(mut self, config: InferenceConfig) -> Self {
107        self.inference_config = Some(config);
108        self
109    }
110
111    /// Add extra arguments to pass to the server.
112    #[must_use]
113    pub fn with_extra_args(mut self, args: Vec<String>) -> Self {
114        self.extra_args = args;
115        self
116    }
117}
118
119/// Handle to a running server process.
120///
121/// This is an opaque handle that implementations use to track processes.
122/// It contains enough information to identify and manage the process.
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct ProcessHandle {
125    /// Database ID of the model being served.
126    pub model_id: i64,
127    /// Human-readable model name.
128    pub model_name: String,
129    /// Process ID (if running on local system).
130    pub pid: Option<u32>,
131    /// Port the server is listening on.
132    pub port: u16,
133    /// Unix timestamp (seconds) when the server was started.
134    pub started_at: u64,
135}
136
137impl ProcessHandle {
138    /// Create a new process handle.
139    #[must_use]
140    pub const fn new(
141        model_id: i64,
142        model_name: String,
143        pid: Option<u32>,
144        port: u16,
145        started_at: u64,
146    ) -> Self {
147        Self {
148            model_id,
149            model_name,
150            pid,
151            port,
152            started_at,
153        }
154    }
155}
156
157/// Health status of a running server.
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ServerHealth {
160    /// Whether the server is responding to health checks.
161    pub healthy: bool,
162    /// Unix timestamp (seconds) of the last successful health check.
163    pub last_check: Option<u64>,
164    /// Context size being used by the server.
165    pub context_size: Option<u64>,
166    /// Optional status message.
167    pub message: Option<String>,
168}
169
170impl ServerHealth {
171    /// Get the current Unix timestamp in seconds.
172    fn now_secs() -> u64 {
173        std::time::SystemTime::now()
174            .duration_since(std::time::UNIX_EPOCH)
175            .unwrap()
176            .as_secs()
177    }
178
179    /// Create a healthy server status.
180    #[must_use]
181    pub fn healthy() -> Self {
182        Self {
183            healthy: true,
184            last_check: Some(Self::now_secs()),
185            context_size: None,
186            message: None,
187        }
188    }
189
190    /// Create an unhealthy server status with a message.
191    pub fn unhealthy(message: impl Into<String>) -> Self {
192        Self {
193            healthy: false,
194            last_check: Some(Self::now_secs()),
195            context_size: None,
196            message: Some(message.into()),
197        }
198    }
199
200    /// Set the context size.
201    #[must_use]
202    pub const fn with_context_size(mut self, size: u64) -> Self {
203        self.context_size = Some(size);
204        self
205    }
206}
207
208/// Process runner for managing model server processes.
209///
210/// This trait abstracts process management for testability and
211/// potential alternative backends (local, remote, containerized).
212///
213/// # Design Rules
214///
215/// - Express **intent**, not implementation detail
216/// - No CLI/Tauri/Axum concerns in signatures
217/// - Must support: mock runner, remote runner, alternative inference backends
218#[async_trait]
219pub trait ProcessRunner: Send + Sync {
220    /// Start a model server with the given configuration.
221    ///
222    /// Returns a handle that can be used to manage the process.
223    async fn start(&self, config: ServerConfig) -> Result<ProcessHandle, ProcessError>;
224
225    /// Stop a running server.
226    ///
227    /// Returns `Err(ProcessError::NotRunning)` if the process isn't running.
228    async fn stop(&self, handle: &ProcessHandle) -> Result<(), ProcessError>;
229
230    /// Check if a server is still running.
231    async fn is_running(&self, handle: &ProcessHandle) -> bool;
232
233    /// Get the health status of a running server.
234    ///
235    /// Returns `Err(ProcessError::NotRunning)` if the process isn't running.
236    async fn health(&self, handle: &ProcessHandle) -> Result<ServerHealth, ProcessError>;
237
238    /// List all currently running server processes.
239    ///
240    /// This is needed for snapshot behavior (e.g., `server:snapshot` events).
241    async fn list_running(&self) -> Result<Vec<ProcessHandle>, ProcessError>;
242}