Sebas Tin angelegt und Synology Chat connected
This commit is contained in:
@@ -0,0 +1,7 @@
|
||||
/// Abstraktionsschicht für alle LLM-Provider.
|
||||
/// Neue Provider (Ollama, Mistral) werden hier als weitere Submodule ergänzt.
|
||||
pub mod provider;
|
||||
pub mod lmstudio;
|
||||
|
||||
// Re-export der wichtigsten Typen damit Nutzer nur `api::llm::X` schreiben müssen
|
||||
pub use provider::{LlmProvider, LlmRequest, LlmResponse, Message};
|
||||
@@ -0,0 +1,150 @@
|
||||
use async_trait::async_trait;
|
||||
use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use nazarick_core::types::Result;
|
||||
use nazarick_core::error::NazarickError;
|
||||
use crate::llm::provider::{LlmProvider, LlmRequest, LlmResponse, Message};
|
||||
|
||||
/// LM Studio Provider — für lokale Entwicklung auf dem Entwicklungsrechner.
|
||||
/// LM Studio emuliert die OpenAI Chat Completions API, daher nutzen
|
||||
/// wir das OpenAI-kompatible Request/Response Format.
|
||||
pub struct LmStudioProvider {
|
||||
/// HTTP Client — wird wiederverwendet für Connection Pooling
|
||||
client: Client,
|
||||
/// Basis-URL von LM Studio, standard: http://localhost:1234
|
||||
base_url: String,
|
||||
/// Exakter Modell-Identifier wie er in LM Studio angezeigt wird
|
||||
model: String,
|
||||
}
|
||||
|
||||
impl LmStudioProvider {
|
||||
/// Erstellt einen neuen LM Studio Provider.
|
||||
/// `base_url` — z.B. "http://localhost:1234"
|
||||
/// `model` — z.B. "qwen/qwen3.5-9b"
|
||||
pub fn new(base_url: impl Into<String>, model: impl Into<String>) -> Self {
|
||||
Self {
|
||||
client: Client::new(),
|
||||
base_url: base_url.into(),
|
||||
model: model.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Internes Message-Format — wird sowohl für Request (Serialize)
|
||||
/// als auch für Response (Deserialize) verwendet.
|
||||
/// Qwen3 nutzt reasoning_content statt content wenn Thinking Mode aktiv.
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct OpenAiMessage {
|
||||
role: String,
|
||||
/// Normale Antwort — bei Qwen3 Thinking Mode leer
|
||||
#[serde(default)]
|
||||
content: String,
|
||||
/// Qwen3 Thinking Mode — enthält die eigentliche Antwort wenn content leer
|
||||
#[serde(default)]
|
||||
reasoning_content: String,
|
||||
}
|
||||
|
||||
/// Internes Request-Format — entspricht der OpenAI Chat Completions API.
|
||||
#[derive(Serialize)]
|
||||
struct OpenAiRequest {
|
||||
model: String,
|
||||
messages: Vec<OpenAiMessage>,
|
||||
max_tokens: u32,
|
||||
temperature: f32,
|
||||
/// Qwen3 Thinking Mode deaktivieren — funktioniert nicht bei allen
|
||||
/// LM Studio Versionen, daher lesen wir zusätzlich reasoning_content
|
||||
thinking: bool,
|
||||
}
|
||||
|
||||
/// Response-Format der OpenAI Chat Completions API.
|
||||
#[derive(Deserialize)]
|
||||
struct OpenAiResponse {
|
||||
choices: Vec<OpenAiChoice>,
|
||||
usage: Option<OpenAiUsage>,
|
||||
}
|
||||
|
||||
/// Ein einzelner Antwort-Kandidat (LLMs können mehrere zurückgeben,
|
||||
/// wir nutzen immer den ersten).
|
||||
#[derive(Deserialize)]
|
||||
struct OpenAiChoice {
|
||||
message: OpenAiMessage,
|
||||
}
|
||||
|
||||
/// Token-Verbrauch wie von der API zurückgemeldet.
|
||||
#[derive(Deserialize)]
|
||||
struct OpenAiUsage {
|
||||
prompt_tokens: u64,
|
||||
completion_tokens: u64,
|
||||
}
|
||||
|
||||
/// Konvertiert unsere internen Messages in das OpenAI Format.
|
||||
/// reasoning_content wird beim Senden nicht mitgeschickt — nur role und content.
|
||||
fn to_openai_message(msg: &Message) -> OpenAiMessage {
|
||||
OpenAiMessage {
|
||||
role: msg.role.clone(),
|
||||
content: msg.content.clone(),
|
||||
reasoning_content: String::new(),
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl LlmProvider for LmStudioProvider {
|
||||
async fn complete(&self, request: LlmRequest) -> Result<LlmResponse> {
|
||||
// Request in OpenAI Format umwandeln
|
||||
let openai_request = OpenAiRequest {
|
||||
model: self.model.clone(),
|
||||
messages: request.messages.iter().map(to_openai_message).collect(),
|
||||
max_tokens: request.max_tokens,
|
||||
temperature: request.temperature,
|
||||
thinking: false,
|
||||
};
|
||||
|
||||
// HTTP POST an LM Studio senden
|
||||
let response = self.client
|
||||
.post(format!("{}/v1/chat/completions", self.base_url))
|
||||
.json(&openai_request)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|e| NazarickError::Api(e.to_string()))?;
|
||||
|
||||
// HTTP Fehler prüfen (z.B. 404, 500)
|
||||
let response = response
|
||||
.error_for_status()
|
||||
.map_err(|e| NazarickError::Api(e.to_string()))?;
|
||||
|
||||
// Rohe JSON-Antwort lesen — response wird dabei konsumiert
|
||||
let raw_text = response
|
||||
.text()
|
||||
.await
|
||||
.map_err(|e| NazarickError::Api(e.to_string()))?;
|
||||
|
||||
// JSON Response parsen
|
||||
let openai_response: OpenAiResponse = serde_json::from_str(&raw_text)
|
||||
.map_err(|e| NazarickError::Api(e.to_string()))?;
|
||||
|
||||
// Content extrahieren — Qwen3 Thinking Mode schreibt in reasoning_content
|
||||
// statt content. Wir nehmen was befüllt ist, content hat Priorität.
|
||||
let content = openai_response.choices
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|c| {
|
||||
if !c.message.content.is_empty() {
|
||||
c.message.content
|
||||
} else {
|
||||
c.message.reasoning_content
|
||||
}
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// Token-Zahlen aus Usage extrahieren (falls vorhanden)
|
||||
let (tokens_input, tokens_output) = openai_response.usage
|
||||
.map(|u| (u.prompt_tokens, u.completion_tokens))
|
||||
.unwrap_or((0, 0));
|
||||
|
||||
Ok(LlmResponse { content, tokens_input, tokens_output })
|
||||
}
|
||||
|
||||
fn name(&self) -> &str {
|
||||
"LmStudio"
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
use nazarick_core::types::Result;
|
||||
|
||||
/// Repräsentiert eine einzelne Nachricht in einem Gespräch.
|
||||
/// Entspricht dem Message-Format das alle gängigen LLM APIs verwenden.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Message {
|
||||
/// Rolle des Absenders: "system", "user" oder "assistant"
|
||||
pub role: String,
|
||||
/// Inhalt der Nachricht
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
impl Message {
|
||||
/// Erstellt eine System-Nachricht (z.B. den Persönlichkeits-Prompt)
|
||||
pub fn system(content: impl Into<String>) -> Self {
|
||||
Self { role: "system".to_string(), content: content.into() }
|
||||
}
|
||||
|
||||
/// Erstellt eine User-Nachricht
|
||||
pub fn user(content: impl Into<String>) -> Self {
|
||||
Self { role: "user".to_string(), content: content.into() }
|
||||
}
|
||||
|
||||
/// Erstellt eine Assistant-Nachricht (vorherige Antworten für Kontext)
|
||||
pub fn assistant(content: impl Into<String>) -> Self {
|
||||
Self { role: "assistant".to_string(), content: content.into() }
|
||||
}
|
||||
}
|
||||
|
||||
/// Konfiguration für einen einzelnen LLM-Aufruf.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LlmRequest {
|
||||
/// Der vollständige Gesprächsverlauf inklusive System-Prompt
|
||||
pub messages: Vec<Message>,
|
||||
/// Maximale Anzahl Token in der Antwort
|
||||
pub max_tokens: u32,
|
||||
/// Kreativität der Antwort (0.0 = deterministisch, 1.0 = sehr kreativ)
|
||||
pub temperature: f32,
|
||||
}
|
||||
|
||||
/// Antwort eines LLM-Aufrufs.
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct LlmResponse {
|
||||
/// Der generierte Text
|
||||
pub content: String,
|
||||
/// Anzahl der Input-Token (für Usage-Tracking)
|
||||
pub tokens_input: u64,
|
||||
/// Anzahl der Output-Token (für Usage-Tracking)
|
||||
pub tokens_output: u64,
|
||||
}
|
||||
|
||||
/// Zentraler Trait für alle LLM-Provider.
|
||||
#[async_trait::async_trait]
|
||||
pub trait LlmProvider: Send + Sync {
|
||||
/// Sendet eine Anfrage an das LLM und gibt die Antwort zurück.
|
||||
async fn complete(&self, request: LlmRequest) -> Result<LlmResponse>;
|
||||
|
||||
/// Gibt den Namen des Providers zurück.
|
||||
/// Wird für Logging und Usage-Tracking verwendet.
|
||||
fn name(&self) -> &str;
|
||||
}
|
||||
Reference in New Issue
Block a user