gglib_core/ports/process_runner.rs
1//! Process runner trait definition.
2//!
3//! This port defines the interface for managing model server processes.
4//! Implementations handle all process lifecycle details internally.
5
6use async_trait::async_trait;
7use serde::{Deserialize, Serialize};
8use std::path::PathBuf;
9
10use super::ProcessError;
11use crate::domain::InferenceConfig;
12
13/// Configuration for starting a model server.
14///
15/// This is an intent-based configuration — it expresses what the caller
16/// wants, not how the server should be started. All typed fields are
17/// handled by `build_and_spawn()`; `extra_args` is an escape hatch for
18/// flags not yet promoted to first-class fields.
19#[derive(Debug, Clone, Serialize, Deserialize)]
20pub struct ServerConfig {
21 /// Database ID of the model to serve.
22 pub model_id: i64,
23 /// Human-readable model name.
24 pub model_name: String,
25 /// Path to the model file.
26 pub model_path: PathBuf,
27 /// Port to listen on (if None, a free port will be assigned).
28 pub port: Option<u16>,
29 /// Base port for allocation when port is None.
30 pub base_port: u16,
31 /// Context size to use (if None, use model default).
32 pub context_size: Option<u64>,
33 /// Number of GPU layers to offload (if None, use default).
34 pub gpu_layers: Option<i32>,
35 /// Enable Jinja templating for chat formats.
36 pub jinja: bool,
37 /// Reasoning format override (e.g., `"deepseek"`, `"none"`).
38 pub reasoning_format: Option<String>,
39 /// Inference sampling parameters (temperature, `top_p`, etc.).
40 pub inference_config: Option<InferenceConfig>,
41 /// Additional server-specific options (escape hatch).
42 pub extra_args: Vec<String>,
43}
44
45impl ServerConfig {
46 /// Create a new server configuration with required fields.
47 #[must_use]
48 pub const fn new(
49 model_id: i64,
50 model_name: String,
51 model_path: PathBuf,
52 base_port: u16,
53 ) -> Self {
54 Self {
55 model_id,
56 model_name,
57 model_path,
58 port: None,
59 base_port,
60 context_size: None,
61 gpu_layers: None,
62 jinja: false,
63 reasoning_format: None,
64 inference_config: None,
65 extra_args: Vec::new(),
66 }
67 }
68
69 /// Set the port to listen on.
70 #[must_use]
71 pub const fn with_port(mut self, port: u16) -> Self {
72 self.port = Some(port);
73 self
74 }
75
76 /// Set the context size.
77 #[must_use]
78 pub const fn with_context_size(mut self, size: u64) -> Self {
79 self.context_size = Some(size);
80 self
81 }
82
83 /// Set the number of GPU layers.
84 #[must_use]
85 pub const fn with_gpu_layers(mut self, layers: i32) -> Self {
86 self.gpu_layers = Some(layers);
87 self
88 }
89
90 /// Enable Jinja templating.
91 #[must_use]
92 pub const fn with_jinja(mut self) -> Self {
93 self.jinja = true;
94 self
95 }
96
97 /// Set the reasoning format (e.g., `"deepseek"`, `"none"`).
98 #[must_use]
99 pub fn with_reasoning_format(mut self, format: String) -> Self {
100 self.reasoning_format = Some(format);
101 self
102 }
103
104 /// Set inference sampling parameters.
105 #[must_use]
106 pub const fn with_inference_config(mut self, config: InferenceConfig) -> Self {
107 self.inference_config = Some(config);
108 self
109 }
110
111 /// Add extra arguments to pass to the server.
112 #[must_use]
113 pub fn with_extra_args(mut self, args: Vec<String>) -> Self {
114 self.extra_args = args;
115 self
116 }
117}
118
119/// Handle to a running server process.
120///
121/// This is an opaque handle that implementations use to track processes.
122/// It contains enough information to identify and manage the process.
123#[derive(Debug, Clone, Serialize, Deserialize)]
124pub struct ProcessHandle {
125 /// Database ID of the model being served.
126 pub model_id: i64,
127 /// Human-readable model name.
128 pub model_name: String,
129 /// Process ID (if running on local system).
130 pub pid: Option<u32>,
131 /// Port the server is listening on.
132 pub port: u16,
133 /// Unix timestamp (seconds) when the server was started.
134 pub started_at: u64,
135}
136
137impl ProcessHandle {
138 /// Create a new process handle.
139 #[must_use]
140 pub const fn new(
141 model_id: i64,
142 model_name: String,
143 pid: Option<u32>,
144 port: u16,
145 started_at: u64,
146 ) -> Self {
147 Self {
148 model_id,
149 model_name,
150 pid,
151 port,
152 started_at,
153 }
154 }
155}
156
157/// Health status of a running server.
158#[derive(Debug, Clone, Serialize, Deserialize)]
159pub struct ServerHealth {
160 /// Whether the server is responding to health checks.
161 pub healthy: bool,
162 /// Unix timestamp (seconds) of the last successful health check.
163 pub last_check: Option<u64>,
164 /// Context size being used by the server.
165 pub context_size: Option<u64>,
166 /// Optional status message.
167 pub message: Option<String>,
168}
169
170impl ServerHealth {
171 /// Get the current Unix timestamp in seconds.
172 fn now_secs() -> u64 {
173 std::time::SystemTime::now()
174 .duration_since(std::time::UNIX_EPOCH)
175 .unwrap()
176 .as_secs()
177 }
178
179 /// Create a healthy server status.
180 #[must_use]
181 pub fn healthy() -> Self {
182 Self {
183 healthy: true,
184 last_check: Some(Self::now_secs()),
185 context_size: None,
186 message: None,
187 }
188 }
189
190 /// Create an unhealthy server status with a message.
191 pub fn unhealthy(message: impl Into<String>) -> Self {
192 Self {
193 healthy: false,
194 last_check: Some(Self::now_secs()),
195 context_size: None,
196 message: Some(message.into()),
197 }
198 }
199
200 /// Set the context size.
201 #[must_use]
202 pub const fn with_context_size(mut self, size: u64) -> Self {
203 self.context_size = Some(size);
204 self
205 }
206}
207
208/// Process runner for managing model server processes.
209///
210/// This trait abstracts process management for testability and
211/// potential alternative backends (local, remote, containerized).
212///
213/// # Design Rules
214///
215/// - Express **intent**, not implementation detail
216/// - No CLI/Tauri/Axum concerns in signatures
217/// - Must support: mock runner, remote runner, alternative inference backends
218#[async_trait]
219pub trait ProcessRunner: Send + Sync {
220 /// Start a model server with the given configuration.
221 ///
222 /// Returns a handle that can be used to manage the process.
223 async fn start(&self, config: ServerConfig) -> Result<ProcessHandle, ProcessError>;
224
225 /// Stop a running server.
226 ///
227 /// Returns `Err(ProcessError::NotRunning)` if the process isn't running.
228 async fn stop(&self, handle: &ProcessHandle) -> Result<(), ProcessError>;
229
230 /// Check if a server is still running.
231 async fn is_running(&self, handle: &ProcessHandle) -> bool;
232
233 /// Get the health status of a running server.
234 ///
235 /// Returns `Err(ProcessError::NotRunning)` if the process isn't running.
236 async fn health(&self, handle: &ProcessHandle) -> Result<ServerHealth, ProcessError>;
237
238 /// List all currently running server processes.
239 ///
240 /// This is needed for snapshot behavior (e.g., `server:snapshot` events).
241 async fn list_running(&self) -> Result<Vec<ProcessHandle>, ProcessError>;
242}