Skip to main content

spin_factor_llm/
lib.rs

1mod host;
2pub mod spin;
3
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use async_trait::async_trait;
8use spin_factor_otel::OtelFactorState;
9use spin_factors::{
10    ConfigureAppContext, Factor, FactorData, PrepareContext, RuntimeFactors, SelfInstanceBuilder,
11};
12use spin_locked_app::MetadataKey;
13use spin_world::v1::llm::{self as v1};
14use spin_world::v2::llm::{self as v2};
15use tokio::sync::Mutex;
16
17pub const ALLOWED_MODELS_KEY: MetadataKey<Vec<String>> = MetadataKey::new("ai_models");
18
19/// The factor for LLMs.
20pub struct LlmFactor {
21    default_engine_creator: Box<dyn LlmEngineCreator>,
22}
23
24impl LlmFactor {
25    /// Creates a new LLM factor with the given default engine creator.
26    ///
27    /// The default engine creator is used to create the engine if no runtime configuration is provided.
28    pub fn new<F: LlmEngineCreator + 'static>(default_engine_creator: F) -> Self {
29        Self {
30            default_engine_creator: Box::new(default_engine_creator),
31        }
32    }
33}
34
35impl Factor for LlmFactor {
36    type RuntimeConfig = RuntimeConfig;
37    type AppState = AppState;
38    type InstanceBuilder = InstanceState;
39
40    fn init(&mut self, ctx: &mut impl spin_factors::InitContext<Self>) -> anyhow::Result<()> {
41        ctx.link_bindings(spin_world::v1::llm::add_to_linker::<_, FactorData<Self>>)?;
42        ctx.link_bindings(spin_world::v2::llm::add_to_linker::<_, FactorData<Self>>)?;
43        Ok(())
44    }
45
46    fn configure_app<T: RuntimeFactors>(
47        &self,
48        mut ctx: ConfigureAppContext<T, Self>,
49    ) -> anyhow::Result<Self::AppState> {
50        let component_allowed_models = ctx
51            .app()
52            .components()
53            .map(|component| {
54                Ok((
55                    component.id().to_string(),
56                    component
57                        .get_metadata(ALLOWED_MODELS_KEY)?
58                        .unwrap_or_default()
59                        .into_iter()
60                        .collect::<HashSet<_>>()
61                        .into(),
62                ))
63            })
64            .collect::<anyhow::Result<_>>()?;
65        let engine = ctx
66            .take_runtime_config()
67            .map(|c| c.engine)
68            .unwrap_or_else(|| self.default_engine_creator.create());
69        Ok(AppState {
70            engine,
71            component_allowed_models,
72        })
73    }
74
75    fn prepare<T: RuntimeFactors>(
76        &self,
77        mut ctx: PrepareContext<T, Self>,
78    ) -> anyhow::Result<Self::InstanceBuilder> {
79        let allowed_models = ctx
80            .app_state()
81            .component_allowed_models
82            .get(ctx.app_component().id())
83            .cloned()
84            .unwrap_or_default();
85        let engine = ctx.app_state().engine.clone();
86        let otel = OtelFactorState::from_prepare_context(&mut ctx)?;
87
88        Ok(InstanceState {
89            engine,
90            allowed_models,
91            otel,
92        })
93    }
94}
95
96/// The application state for the LLM factor.
97pub struct AppState {
98    engine: Arc<Mutex<dyn LlmEngine>>,
99    component_allowed_models: HashMap<String, Arc<HashSet<String>>>,
100}
101
102/// The instance state for the LLM factor.
103pub struct InstanceState {
104    engine: Arc<Mutex<dyn LlmEngine>>,
105    pub allowed_models: Arc<HashSet<String>>,
106    otel: OtelFactorState,
107}
108
109/// The runtime configuration for the LLM factor.
110pub struct RuntimeConfig {
111    engine: Arc<Mutex<dyn LlmEngine>>,
112}
113
114impl SelfInstanceBuilder for InstanceState {}
115
116/// The interface for a language model engine.
117#[async_trait]
118pub trait LlmEngine: Send + Sync {
119    async fn infer(
120        &mut self,
121        model: v1::InferencingModel,
122        prompt: String,
123        params: v2::InferencingParams,
124        max_result_bytes: usize,
125    ) -> Result<v2::InferencingResult, v2::Error>;
126
127    async fn generate_embeddings(
128        &mut self,
129        model: v2::EmbeddingModel,
130        data: Vec<String>,
131        max_result_bytes: usize,
132    ) -> Result<v2::EmbeddingsResult, v2::Error>;
133
134    /// A human-readable summary of the given engine's configuration
135    ///
136    /// Example: "local model"
137    fn summary(&self) -> Option<String> {
138        None
139    }
140}
141
142/// A creator for an LLM engine.
143pub trait LlmEngineCreator: Send + Sync {
144    fn create(&self) -> Arc<Mutex<dyn LlmEngine>>;
145}
146
147impl<F> LlmEngineCreator for F
148where
149    F: Fn() -> Arc<Mutex<dyn LlmEngine>> + Send + Sync,
150{
151    fn create(&self) -> Arc<Mutex<dyn LlmEngine>> {
152        self()
153    }
154}