Thinking Mode Filter, SkillFormat Strategy, Persönlichkeitsanpassung funktioniert
This commit is contained in:
@@ -3,7 +3,7 @@ use reqwest::Client;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use nazarick_core::types::Result;
|
||||
use nazarick_core::error::NazarickError;
|
||||
use nazarick_core::llm::{LlmProvider, LlmRequest, LlmResponse, Message};
|
||||
use nazarick_core::llm::{LlmProvider, LlmRequest, LlmResponse, Message, SkillFormat};
|
||||
|
||||
/// LM Studio Provider — für lokale Entwicklung auf dem Entwicklungsrechner.
|
||||
/// LM Studio emuliert die OpenAI Chat Completions API, daher nutzen
|
||||
@@ -28,6 +28,22 @@ impl LmStudioProvider {
|
||||
model: model.into(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Entfernt Qwen3 Thinking Mode Tags aus der Antwort.
|
||||
/// Robuster Fallback falls "thinking: false" vom Modell ignoriert wird.
|
||||
fn strip_thinking(response: &str) -> String {
|
||||
let mut result = response.to_string();
|
||||
while let Some(start) = result.find("<think>") {
|
||||
if let Some(end) = result.find("</think>") {
|
||||
let tag = result[start..end + "</think>".len()].to_string();
|
||||
result = result.replace(&tag, "");
|
||||
} else {
|
||||
result = result[..start].to_string();
|
||||
break;
|
||||
}
|
||||
}
|
||||
result.trim().to_string()
|
||||
}
|
||||
}
|
||||
|
||||
/// Internes Message-Format — wird sowohl für Request (Serialize)
|
||||
@@ -52,7 +68,7 @@ struct OpenAiRequest {
|
||||
max_tokens: u32,
|
||||
temperature: f32,
|
||||
/// Qwen3 Thinking Mode deaktivieren — funktioniert nicht bei allen
|
||||
/// LM Studio Versionen, daher lesen wir zusätzlich reasoning_content
|
||||
/// LM Studio Versionen, daher strippen wir zusätzlich im Response
|
||||
thinking: bool,
|
||||
}
|
||||
|
||||
@@ -124,7 +140,7 @@ impl LlmProvider for LmStudioProvider {
|
||||
|
||||
// Content extrahieren — Qwen3 Thinking Mode schreibt in reasoning_content
|
||||
// statt content. Wir nehmen was befüllt ist, content hat Priorität.
|
||||
let content = openai_response.choices
|
||||
let raw_content = openai_response.choices
|
||||
.into_iter()
|
||||
.next()
|
||||
.map(|c| {
|
||||
@@ -136,6 +152,9 @@ impl LlmProvider for LmStudioProvider {
|
||||
})
|
||||
.unwrap_or_default();
|
||||
|
||||
// Thinking Tags entfernen — Fallback falls thinking:false ignoriert wird
|
||||
let content = Self::strip_thinking(&raw_content);
|
||||
|
||||
// Token-Zahlen aus Usage extrahieren (falls vorhanden)
|
||||
let (tokens_input, tokens_output) = openai_response.usage
|
||||
.map(|u| (u.prompt_tokens, u.completion_tokens))
|
||||
@@ -147,4 +166,9 @@ impl LlmProvider for LmStudioProvider {
|
||||
fn name(&self) -> &str {
|
||||
"LmStudio"
|
||||
}
|
||||
|
||||
/// Lokale Modelle via LM Studio nutzen XML-Format für Skill-Calls.
|
||||
fn skill_format(&self) -> SkillFormat {
|
||||
SkillFormat::Xml
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user