1mod host;
2pub mod spin;
3
4use std::collections::{HashMap, HashSet};
5use std::sync::Arc;
6
7use async_trait::async_trait;
8use spin_factor_otel::OtelFactorState;
9use spin_factors::{
10 ConfigureAppContext, Factor, FactorData, PrepareContext, RuntimeFactors, SelfInstanceBuilder,
11};
12use spin_locked_app::MetadataKey;
13use spin_world::v1::llm::{self as v1};
14use spin_world::v2::llm::{self as v2};
15use tokio::sync::Mutex;
16
17pub const ALLOWED_MODELS_KEY: MetadataKey<Vec<String>> = MetadataKey::new("ai_models");
18
19pub struct LlmFactor {
21 default_engine_creator: Box<dyn LlmEngineCreator>,
22}
23
24impl LlmFactor {
25 pub fn new<F: LlmEngineCreator + 'static>(default_engine_creator: F) -> Self {
29 Self {
30 default_engine_creator: Box::new(default_engine_creator),
31 }
32 }
33}
34
35impl Factor for LlmFactor {
36 type RuntimeConfig = RuntimeConfig;
37 type AppState = AppState;
38 type InstanceBuilder = InstanceState;
39
40 fn init(&mut self, ctx: &mut impl spin_factors::InitContext<Self>) -> anyhow::Result<()> {
41 ctx.link_bindings(spin_world::v1::llm::add_to_linker::<_, FactorData<Self>>)?;
42 ctx.link_bindings(spin_world::v2::llm::add_to_linker::<_, FactorData<Self>>)?;
43 Ok(())
44 }
45
46 fn configure_app<T: RuntimeFactors>(
47 &self,
48 mut ctx: ConfigureAppContext<T, Self>,
49 ) -> anyhow::Result<Self::AppState> {
50 let component_allowed_models = ctx
51 .app()
52 .components()
53 .map(|component| {
54 Ok((
55 component.id().to_string(),
56 component
57 .get_metadata(ALLOWED_MODELS_KEY)?
58 .unwrap_or_default()
59 .into_iter()
60 .collect::<HashSet<_>>()
61 .into(),
62 ))
63 })
64 .collect::<anyhow::Result<_>>()?;
65 let engine = ctx
66 .take_runtime_config()
67 .map(|c| c.engine)
68 .unwrap_or_else(|| self.default_engine_creator.create());
69 Ok(AppState {
70 engine,
71 component_allowed_models,
72 })
73 }
74
75 fn prepare<T: RuntimeFactors>(
76 &self,
77 mut ctx: PrepareContext<T, Self>,
78 ) -> anyhow::Result<Self::InstanceBuilder> {
79 let allowed_models = ctx
80 .app_state()
81 .component_allowed_models
82 .get(ctx.app_component().id())
83 .cloned()
84 .unwrap_or_default();
85 let engine = ctx.app_state().engine.clone();
86 let otel = OtelFactorState::from_prepare_context(&mut ctx)?;
87
88 Ok(InstanceState {
89 engine,
90 allowed_models,
91 otel,
92 })
93 }
94}
95
96pub struct AppState {
98 engine: Arc<Mutex<dyn LlmEngine>>,
99 component_allowed_models: HashMap<String, Arc<HashSet<String>>>,
100}
101
102pub struct InstanceState {
104 engine: Arc<Mutex<dyn LlmEngine>>,
105 pub allowed_models: Arc<HashSet<String>>,
106 otel: OtelFactorState,
107}
108
109pub struct RuntimeConfig {
111 engine: Arc<Mutex<dyn LlmEngine>>,
112}
113
114impl SelfInstanceBuilder for InstanceState {}
115
116#[async_trait]
118pub trait LlmEngine: Send + Sync {
119 async fn infer(
120 &mut self,
121 model: v1::InferencingModel,
122 prompt: String,
123 params: v2::InferencingParams,
124 max_result_bytes: usize,
125 ) -> Result<v2::InferencingResult, v2::Error>;
126
127 async fn generate_embeddings(
128 &mut self,
129 model: v2::EmbeddingModel,
130 data: Vec<String>,
131 max_result_bytes: usize,
132 ) -> Result<v2::EmbeddingsResult, v2::Error>;
133
134 fn summary(&self) -> Option<String> {
138 None
139 }
140}
141
142pub trait LlmEngineCreator: Send + Sync {
144 fn create(&self) -> Arc<Mutex<dyn LlmEngine>>;
145}
146
147impl<F> LlmEngineCreator for F
148where
149 F: Fn() -> Arc<Mutex<dyn LlmEngine>> + Send + Sync,
150{
151 fn create(&self) -> Arc<Mutex<dyn LlmEngine>> {
152 self()
153 }
154}