pub struct ServerConfig {Show 13 fields
pub model_id: i64,
pub model_name: String,
pub model_path: PathBuf,
pub port: Option<u16>,
pub base_port: u16,
pub context_size: Option<u64>,
pub gpu_layers: Option<i32>,
pub jinja: bool,
pub reasoning_format: Option<String>,
pub spec_draft_n_max: Option<u32>,
pub spec_draft_p_min: Option<f32>,
pub inference_config: Option<InferenceConfig>,
pub extra_args: Vec<String>,
}Expand description
Configuration for starting a model server.
This is an intent-based configuration — it expresses what the caller
wants, not how the server should be started. All typed fields are
handled by build_and_spawn(); extra_args is an escape hatch for
flags not yet promoted to first-class fields.
Fields§
§model_id: i64Database ID of the model to serve.
model_name: StringHuman-readable model name.
model_path: PathBufPath to the model file.
port: Option<u16>Port to listen on (if None, a free port will be assigned).
base_port: u16Base port for allocation when port is None.
context_size: Option<u64>Context size to use (if None, use model default).
gpu_layers: Option<i32>Number of GPU layers to offload (if None, use default).
jinja: boolEnable Jinja templating for chat formats.
reasoning_format: Option<String>Reasoning format override (e.g., "deepseek", "none").
spec_draft_n_max: Option<u32>Number of MTP draft tokens to speculate ahead (--spec-draft-n-max).
None means MTP speculative decoding is disabled. When Some(n),
--spec-type draft-mtp and --spec-draft-n-max n are passed to
llama-server. Recommended value: 2 (Unsloth default).
spec_draft_p_min: Option<f32>Minimum acceptance probability for MTP draft tokens (--spec-draft-p-min).
Only meaningful when spec_draft_n_max is Some. Skipping low-confidence
draft tokens is especially important on Apple Silicon (Metal) to avoid
throughput regression. Recommended value: 0.75.
inference_config: Option<InferenceConfig>Inference sampling parameters (temperature, top_p, etc.).
extra_args: Vec<String>Additional server-specific options (escape hatch).
Implementations§
Source§impl ServerConfig
impl ServerConfig
Sourcepub const fn new(
model_id: i64,
model_name: String,
model_path: PathBuf,
base_port: u16,
) -> Self
pub const fn new( model_id: i64, model_name: String, model_path: PathBuf, base_port: u16, ) -> Self
Create a new server configuration with required fields.
Sourcepub const fn with_context_size(self, size: u64) -> Self
pub const fn with_context_size(self, size: u64) -> Self
Set the context size.
Sourcepub const fn with_gpu_layers(self, layers: i32) -> Self
pub const fn with_gpu_layers(self, layers: i32) -> Self
Set the number of GPU layers.
Sourcepub const fn with_jinja(self) -> Self
pub const fn with_jinja(self) -> Self
Enable Jinja templating.
Sourcepub fn with_reasoning_format(self, format: String) -> Self
pub fn with_reasoning_format(self, format: String) -> Self
Set the reasoning format (e.g., "deepseek", "none").
Sourcepub const fn with_spec_draft_n_max(self, n: u32) -> Self
pub const fn with_spec_draft_n_max(self, n: u32) -> Self
Enable MTP speculative decoding with the given draft token count.
This causes --spec-type draft-mtp and --spec-draft-n-max n to be
passed to llama-server. Call Self::with_spec_draft_p_min to also
set the acceptance probability threshold (defaults to 0.75).
Sourcepub const fn with_spec_draft_p_min(self, p: f32) -> Self
pub const fn with_spec_draft_p_min(self, p: f32) -> Self
Set the minimum acceptance probability for MTP draft tokens.
Has no effect unless spec_draft_n_max is also set. Recommended
value is 0.75; lower values trade quality for speed.
Sourcepub const fn with_inference_config(self, config: InferenceConfig) -> Self
pub const fn with_inference_config(self, config: InferenceConfig) -> Self
Set inference sampling parameters.
Sourcepub fn with_extra_args(self, args: Vec<String>) -> Self
pub fn with_extra_args(self, args: Vec<String>) -> Self
Add extra arguments to pass to the server.
Trait Implementations§
Source§impl Clone for ServerConfig
impl Clone for ServerConfig
Source§fn clone(&self) -> ServerConfig
fn clone(&self) -> ServerConfig
1.0.0 · Source§fn clone_from(&mut self, source: &Self)
fn clone_from(&mut self, source: &Self)
source. Read more