spin_factor_llm/
spin.rs

1use std::path::PathBuf;
2use std::sync::Arc;
3
4use spin_factors::runtime_config::toml::GetTomlValue;
5use spin_llm_remote_http::RemoteHttpLlmEngine;
6use spin_world::async_trait;
7use spin_world::v1::llm::{self as v1};
8use spin_world::v2::llm::{self as v2};
9use tokio::sync::Mutex;
10use url::Url;
11
12use crate::{LlmEngine, LlmEngineCreator, RuntimeConfig};
13
14#[cfg(feature = "llm")]
15mod local {
16    use super::*;
17    pub use spin_llm_local::LocalLlmEngine;
18
19    #[async_trait]
20    impl LlmEngine for LocalLlmEngine {
21        async fn infer(
22            &mut self,
23            model: v2::InferencingModel,
24            prompt: String,
25            params: v2::InferencingParams,
26        ) -> Result<v2::InferencingResult, v2::Error> {
27            self.infer(model, prompt, params).await
28        }
29
30        async fn generate_embeddings(
31            &mut self,
32            model: v2::EmbeddingModel,
33            data: Vec<String>,
34        ) -> Result<v2::EmbeddingsResult, v2::Error> {
35            self.generate_embeddings(model, data).await
36        }
37
38        fn summary(&self) -> Option<String> {
39            Some("local model".to_string())
40        }
41    }
42}
43
44/// The default engine creator for the LLM factor when used in the Spin CLI.
45pub fn default_engine_creator(
46    state_dir: Option<PathBuf>,
47) -> anyhow::Result<impl LlmEngineCreator + 'static> {
48    #[cfg(feature = "llm")]
49    let engine = {
50        use anyhow::Context as _;
51        let models_dir_parent = match state_dir {
52            Some(ref dir) => dir.clone(),
53            None => std::env::current_dir().context("failed to get current working directory")?,
54        };
55        spin_llm_local::LocalLlmEngine::new(models_dir_parent.join("ai-models"))
56    };
57    #[cfg(not(feature = "llm"))]
58    let engine = {
59        let _ = state_dir;
60        noop::NoopLlmEngine
61    };
62    let engine = Arc::new(Mutex::new(engine)) as Arc<Mutex<dyn LlmEngine>>;
63    Ok(move || engine.clone())
64}
65
66#[async_trait]
67impl LlmEngine for RemoteHttpLlmEngine {
68    async fn infer(
69        &mut self,
70        model: v1::InferencingModel,
71        prompt: String,
72        params: v2::InferencingParams,
73    ) -> Result<v2::InferencingResult, v2::Error> {
74        spin_telemetry::monotonic_counter!(spin.llm_infer = 1, model_name = model);
75        self.infer(model, prompt, params).await
76    }
77
78    async fn generate_embeddings(
79        &mut self,
80        model: v2::EmbeddingModel,
81        data: Vec<String>,
82    ) -> Result<v2::EmbeddingsResult, v2::Error> {
83        self.generate_embeddings(model, data).await
84    }
85
86    fn summary(&self) -> Option<String> {
87        Some(format!("model at {}", self.url()))
88    }
89}
90
91pub fn runtime_config_from_toml(
92    table: &impl GetTomlValue,
93    state_dir: Option<PathBuf>,
94) -> anyhow::Result<Option<RuntimeConfig>> {
95    let Some(value) = table.get("llm_compute") else {
96        return Ok(None);
97    };
98    let config: LlmCompute = value.clone().try_into()?;
99
100    Ok(Some(RuntimeConfig {
101        engine: config.into_engine(state_dir)?,
102    }))
103}
104
105#[derive(Debug, serde::Deserialize)]
106#[serde(rename_all = "snake_case", tag = "type")]
107pub enum LlmCompute {
108    Spin,
109    RemoteHttp(RemoteHttpCompute),
110}
111
112impl LlmCompute {
113    fn into_engine(self, state_dir: Option<PathBuf>) -> anyhow::Result<Arc<Mutex<dyn LlmEngine>>> {
114        let engine: Arc<Mutex<dyn LlmEngine>> = match self {
115            #[cfg(not(feature = "llm"))]
116            LlmCompute::Spin => {
117                let _ = state_dir;
118                Arc::new(Mutex::new(noop::NoopLlmEngine))
119            }
120            #[cfg(feature = "llm")]
121            LlmCompute::Spin => default_engine_creator(state_dir)?.create(),
122            LlmCompute::RemoteHttp(config) => Arc::new(Mutex::new(RemoteHttpLlmEngine::new(
123                config.url,
124                config.auth_token,
125            ))),
126        };
127        Ok(engine)
128    }
129}
130
131#[derive(Debug, serde::Deserialize)]
132pub struct RemoteHttpCompute {
133    url: Url,
134    auth_token: String,
135}
136
137/// A noop engine used when the local engine feature is disabled.
138#[cfg(not(feature = "llm"))]
139mod noop {
140    use super::*;
141
142    #[derive(Clone, Copy)]
143    pub(super) struct NoopLlmEngine;
144
145    #[async_trait]
146    impl LlmEngine for NoopLlmEngine {
147        async fn infer(
148            &mut self,
149            _model: v2::InferencingModel,
150            _prompt: String,
151            _params: v2::InferencingParams,
152        ) -> Result<v2::InferencingResult, v2::Error> {
153            Err(v2::Error::RuntimeError(
154                "Local LLM operations are not supported in this version of Spin.".into(),
155            ))
156        }
157
158        async fn generate_embeddings(
159            &mut self,
160            _model: v2::EmbeddingModel,
161            _data: Vec<String>,
162        ) -> Result<v2::EmbeddingsResult, v2::Error> {
163            Err(v2::Error::RuntimeError(
164                "Local LLM operations are not supported in this version of Spin.".into(),
165            ))
166        }
167
168        fn summary(&self) -> Option<String> {
169            Some("noop model".to_owned())
170        }
171    }
172}