1use std::path::PathBuf;
2use std::sync::Arc;
3
4use spin_factors::runtime_config::toml::GetTomlValue;
5use spin_llm_remote_http::{ApiType, RemoteHttpLlmEngine};
6use spin_world::async_trait;
7use spin_world::v1::llm::{self as v1};
8use spin_world::v2::llm::{self as v2};
9use tokio::sync::Mutex;
10use url::Url;
11
12use crate::{LlmEngine, LlmEngineCreator, RuntimeConfig};
13
14#[cfg(feature = "llm")]
15mod local {
16 use super::*;
17 pub use spin_llm_local::LocalLlmEngine;
18
19 #[async_trait]
20 impl LlmEngine for LocalLlmEngine {
21 async fn infer(
22 &mut self,
23 model: v2::InferencingModel,
24 prompt: String,
25 params: v2::InferencingParams,
26 ) -> Result<v2::InferencingResult, v2::Error> {
27 self.infer(model, prompt, params).await
28 }
29
30 async fn generate_embeddings(
31 &mut self,
32 model: v2::EmbeddingModel,
33 data: Vec<String>,
34 ) -> Result<v2::EmbeddingsResult, v2::Error> {
35 self.generate_embeddings(model, data).await
36 }
37
38 fn summary(&self) -> Option<String> {
39 Some("local model".to_string())
40 }
41 }
42}
43
44pub fn default_engine_creator(
46 state_dir: Option<PathBuf>,
47) -> anyhow::Result<impl LlmEngineCreator + 'static> {
48 #[cfg(feature = "llm")]
49 let engine = {
50 use anyhow::Context as _;
51 let models_dir_parent = match state_dir {
52 Some(ref dir) => dir.clone(),
53 None => std::env::current_dir().context("failed to get current working directory")?,
54 };
55 spin_llm_local::LocalLlmEngine::new(models_dir_parent.join("ai-models"))
56 };
57 #[cfg(not(feature = "llm"))]
58 let engine = {
59 let _ = state_dir;
60 noop::NoopLlmEngine
61 };
62 let engine = Arc::new(Mutex::new(engine)) as Arc<Mutex<dyn LlmEngine>>;
63 Ok(move || engine.clone())
64}
65
66#[async_trait]
67impl LlmEngine for RemoteHttpLlmEngine {
68 async fn infer(
69 &mut self,
70 model: v1::InferencingModel,
71 prompt: String,
72 params: v2::InferencingParams,
73 ) -> Result<v2::InferencingResult, v2::Error> {
74 spin_telemetry::monotonic_counter!(spin.llm_infer = 1, model_name = model);
75 self.infer(model, prompt, params).await
76 }
77
78 async fn generate_embeddings(
79 &mut self,
80 model: v2::EmbeddingModel,
81 data: Vec<String>,
82 ) -> Result<v2::EmbeddingsResult, v2::Error> {
83 self.generate_embeddings(model, data).await
84 }
85
86 fn summary(&self) -> Option<String> {
87 Some(format!("model at {}", self.url()))
88 }
89}
90
91pub fn runtime_config_from_toml(
92 table: &impl GetTomlValue,
93 state_dir: Option<PathBuf>,
94) -> anyhow::Result<Option<RuntimeConfig>> {
95 let Some(value) = table.get("llm_compute") else {
96 return Ok(None);
97 };
98 let config: LlmCompute = value.clone().try_into()?;
99
100 Ok(Some(RuntimeConfig {
101 engine: config.into_engine(state_dir)?,
102 }))
103}
104
105#[derive(Debug, serde::Deserialize)]
106#[serde(rename_all = "snake_case", tag = "type")]
107pub enum LlmCompute {
108 Spin,
109 RemoteHttp(RemoteHttpCompute),
110}
111
112impl LlmCompute {
113 fn into_engine(self, state_dir: Option<PathBuf>) -> anyhow::Result<Arc<Mutex<dyn LlmEngine>>> {
114 let engine: Arc<Mutex<dyn LlmEngine>> = match self {
115 #[cfg(not(feature = "llm"))]
116 LlmCompute::Spin => {
117 let _ = state_dir;
118 Arc::new(Mutex::new(noop::NoopLlmEngine))
119 }
120 #[cfg(feature = "llm")]
121 LlmCompute::Spin => default_engine_creator(state_dir)?.create(),
122 LlmCompute::RemoteHttp(config) => Arc::new(Mutex::new(RemoteHttpLlmEngine::new(
123 config.url,
124 config.auth_token,
125 config.api_type,
126 ))),
127 };
128 Ok(engine)
129 }
130}
131
132#[derive(Debug, serde::Deserialize)]
133pub struct RemoteHttpCompute {
134 url: Url,
135 auth_token: String,
136 #[serde(default)]
137 api_type: ApiType,
138}
139
140#[cfg(not(feature = "llm"))]
142mod noop {
143 use super::*;
144
145 #[derive(Clone, Copy)]
146 pub(super) struct NoopLlmEngine;
147
148 #[async_trait]
149 impl LlmEngine for NoopLlmEngine {
150 async fn infer(
151 &mut self,
152 _model: v2::InferencingModel,
153 _prompt: String,
154 _params: v2::InferencingParams,
155 ) -> Result<v2::InferencingResult, v2::Error> {
156 Err(v2::Error::RuntimeError(
157 "Local LLM operations are not supported in this version of Spin.".into(),
158 ))
159 }
160
161 async fn generate_embeddings(
162 &mut self,
163 _model: v2::EmbeddingModel,
164 _data: Vec<String>,
165 ) -> Result<v2::EmbeddingsResult, v2::Error> {
166 Err(v2::Error::RuntimeError(
167 "Local LLM operations are not supported in this version of Spin.".into(),
168 ))
169 }
170
171 fn summary(&self) -> Option<String> {
172 Some("noop model".to_owned())
173 }
174 }
175}