1use std::path::PathBuf;
2use std::sync::Arc;
3
4use spin_factors::runtime_config::toml::GetTomlValue;
5use spin_llm_remote_http::RemoteHttpLlmEngine;
6use spin_world::async_trait;
7use spin_world::v1::llm::{self as v1};
8use spin_world::v2::llm::{self as v2};
9use tokio::sync::Mutex;
10use url::Url;
11
12use crate::{LlmEngine, LlmEngineCreator, RuntimeConfig};
13
14#[cfg(feature = "llm")]
15mod local {
16 use super::*;
17 pub use spin_llm_local::LocalLlmEngine;
18
19 #[async_trait]
20 impl LlmEngine for LocalLlmEngine {
21 async fn infer(
22 &mut self,
23 model: v2::InferencingModel,
24 prompt: String,
25 params: v2::InferencingParams,
26 ) -> Result<v2::InferencingResult, v2::Error> {
27 self.infer(model, prompt, params).await
28 }
29
30 async fn generate_embeddings(
31 &mut self,
32 model: v2::EmbeddingModel,
33 data: Vec<String>,
34 ) -> Result<v2::EmbeddingsResult, v2::Error> {
35 self.generate_embeddings(model, data).await
36 }
37
38 fn summary(&self) -> Option<String> {
39 Some("local model".to_string())
40 }
41 }
42}
43
44pub fn default_engine_creator(
46 state_dir: Option<PathBuf>,
47) -> anyhow::Result<impl LlmEngineCreator + 'static> {
48 #[cfg(feature = "llm")]
49 let engine = {
50 use anyhow::Context as _;
51 let models_dir_parent = match state_dir {
52 Some(ref dir) => dir.clone(),
53 None => std::env::current_dir().context("failed to get current working directory")?,
54 };
55 spin_llm_local::LocalLlmEngine::new(models_dir_parent.join("ai-models"))
56 };
57 #[cfg(not(feature = "llm"))]
58 let engine = {
59 let _ = state_dir;
60 noop::NoopLlmEngine
61 };
62 let engine = Arc::new(Mutex::new(engine)) as Arc<Mutex<dyn LlmEngine>>;
63 Ok(move || engine.clone())
64}
65
66#[async_trait]
67impl LlmEngine for RemoteHttpLlmEngine {
68 async fn infer(
69 &mut self,
70 model: v1::InferencingModel,
71 prompt: String,
72 params: v2::InferencingParams,
73 ) -> Result<v2::InferencingResult, v2::Error> {
74 spin_telemetry::monotonic_counter!(spin.llm_infer = 1, model_name = model);
75 self.infer(model, prompt, params).await
76 }
77
78 async fn generate_embeddings(
79 &mut self,
80 model: v2::EmbeddingModel,
81 data: Vec<String>,
82 ) -> Result<v2::EmbeddingsResult, v2::Error> {
83 self.generate_embeddings(model, data).await
84 }
85
86 fn summary(&self) -> Option<String> {
87 Some(format!("model at {}", self.url()))
88 }
89}
90
91pub fn runtime_config_from_toml(
92 table: &impl GetTomlValue,
93 state_dir: Option<PathBuf>,
94) -> anyhow::Result<Option<RuntimeConfig>> {
95 let Some(value) = table.get("llm_compute") else {
96 return Ok(None);
97 };
98 let config: LlmCompute = value.clone().try_into()?;
99
100 Ok(Some(RuntimeConfig {
101 engine: config.into_engine(state_dir)?,
102 }))
103}
104
105#[derive(Debug, serde::Deserialize)]
106#[serde(rename_all = "snake_case", tag = "type")]
107pub enum LlmCompute {
108 Spin,
109 RemoteHttp(RemoteHttpCompute),
110}
111
112impl LlmCompute {
113 fn into_engine(self, state_dir: Option<PathBuf>) -> anyhow::Result<Arc<Mutex<dyn LlmEngine>>> {
114 let engine: Arc<Mutex<dyn LlmEngine>> = match self {
115 #[cfg(not(feature = "llm"))]
116 LlmCompute::Spin => {
117 let _ = state_dir;
118 Arc::new(Mutex::new(noop::NoopLlmEngine))
119 }
120 #[cfg(feature = "llm")]
121 LlmCompute::Spin => default_engine_creator(state_dir)?.create(),
122 LlmCompute::RemoteHttp(config) => Arc::new(Mutex::new(RemoteHttpLlmEngine::new(
123 config.url,
124 config.auth_token,
125 ))),
126 };
127 Ok(engine)
128 }
129}
130
131#[derive(Debug, serde::Deserialize)]
132pub struct RemoteHttpCompute {
133 url: Url,
134 auth_token: String,
135}
136
137#[cfg(not(feature = "llm"))]
139mod noop {
140 use super::*;
141
142 #[derive(Clone, Copy)]
143 pub(super) struct NoopLlmEngine;
144
145 #[async_trait]
146 impl LlmEngine for NoopLlmEngine {
147 async fn infer(
148 &mut self,
149 _model: v2::InferencingModel,
150 _prompt: String,
151 _params: v2::InferencingParams,
152 ) -> Result<v2::InferencingResult, v2::Error> {
153 Err(v2::Error::RuntimeError(
154 "Local LLM operations are not supported in this version of Spin.".into(),
155 ))
156 }
157
158 async fn generate_embeddings(
159 &mut self,
160 _model: v2::EmbeddingModel,
161 _data: Vec<String>,
162 ) -> Result<v2::EmbeddingsResult, v2::Error> {
163 Err(v2::Error::RuntimeError(
164 "Local LLM operations are not supported in this version of Spin.".into(),
165 ))
166 }
167
168 fn summary(&self) -> Option<String> {
169 Some("noop model".to_owned())
170 }
171 }
172}