diff --git a/README.md b/README.md
index 4e7949f..7364093 100644
--- a/README.md
+++ b/README.md
@@ -28,7 +28,7 @@ On first run, a default config is created at `~/.commandok/config.toml`. Add you
 
 ```toml
 [commandok]
-# Options: anthropic, openai, google, mistral, ollama,
+# Options: anthropic, openai, google, mistral, ollama, llamacpp,
 #          openrouter, xai, vercel_ai_gateway, litert_lm,
 #          apple_intelligence (requires building with --features apple-intelligence on macOS 26+ ARM)
 provider = "anthropic"
@@ -59,6 +59,10 @@ model = "mistral-small-latest"
 model = "gemma3:1b"
 # api_url = "http://localhost:11434"  # default, change if running elsewhere
 
+[llamacpp]
+model = "default"
+# api_url = "http://localhost:8080"  # default, change if running elsewhere
+
 [openrouter]
 api_key = ""
 model = "qwen/qwen3.6-plus:free"
diff --git a/src/config.rs b/src/config.rs
index 93b1178..9bfad5b 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -10,6 +10,7 @@ pub struct Config {
     pub anthropic: Option<ProviderConfig>,
     pub openai: Option<ProviderConfig>,
     pub google: Option<ProviderConfig>,
+    pub llamacpp: Option<ProviderConfig>,
     pub lmstudio: Option<ProviderConfig>,
     pub mistral: Option<ProviderConfig>,
     pub ollama: Option<ProviderConfig>,
@@ -52,7 +53,7 @@ fn config_path() -> PathBuf {
 }
 
 const DEFAULT_CONFIG: &str = r#"[commandok]
-# Options: anthropic, openai, google, mistral, ollama,
+# Options: anthropic, openai, google, mistral, ollama, llamacpp,
 #          openrouter, xai, vercel_ai_gateway, litert_lm,
 #          apple_intelligence (requires building with --features apple-intelligence on macOS 26+ ARM)
 provider = "anthropic"
@@ -83,6 +84,10 @@ model = "mistral-small-latest"
 model = "gemma3:1b"
 # api_url = "http://localhost:11434"  # default, change if running elsewhere
 
+[llamacpp]
+model = "default"
+# api_url = "http://localhost:8080"  # default, change if running elsewhere
+
 [openrouter]
 api_key = ""
 model = "qwen/qwen3.6-plus:free"
@@ -170,6 +175,7 @@ const PROVIDER_ORDER: &[&str] = &[
     "anthropic",
     "openai",
     "google",
+    "llamacpp",
     "lmstudio",
     "mistral",
     "ollama",
@@ -191,6 +197,7 @@ impl Config {
             "anthropic" => self.anthropic.as_ref(),
             "openai" => self.openai.as_ref(),
             "google" => self.google.as_ref(),
+            "llamacpp" => self.llamacpp.as_ref(),
             "lmstudio" => self.lmstudio.as_ref(),
             "mistral" => self.mistral.as_ref(),
             "ollama" => self.ollama.as_ref(),
diff --git a/src/provider/llamacpp.rs b/src/provider/llamacpp.rs
new file mode 100644
index 0000000..a64c8b1
--- /dev/null
+++ b/src/provider/llamacpp.rs
@@ -0,0 +1,43 @@
+use super::ApiEvent;
+use crate::config::ProviderConfig;
+use tokio::sync::mpsc;
+
+pub async fn stream(
+    cfg: &ProviderConfig,
+    query: &str,
+    system_prompt: &str,
+    tx: mpsc::UnboundedSender<ApiEvent>,
+) {
+    let client = reqwest::Client::new();
+    let base_url = if cfg.api_url.is_empty() {
+        "http://localhost:8080"
+    } else {
+        cfg.api_url.trim_end_matches('/')
+    };
+    let url = format!("{base_url}/v1/chat/completions");
+
+    let body = serde_json::json!({
+        "model": cfg.model,
+        "messages": [
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": query}
+        ],
+        "stream": true,
+    });
+
+    let request = client
+        .post(&url)
+        .header("content-type", "application/json")
+        .json(&body);
+
+    let Some(resp) = super::send_request(request, &tx).await else {
+        return;
+    };
+
+    super::parse_sse_stream(resp, &tx, |json| {
+        json["choices"][0]["delta"]["content"]
+            .as_str()
+            .map(String::from)
+    })
+    .await;
+}
diff --git a/src/provider/mod.rs b/src/provider/mod.rs
index 08afc0c..a39b1d0 100644
--- a/src/provider/mod.rs
+++ b/src/provider/mod.rs
@@ -7,6 +7,7 @@ pub mod apple_intelligence;
 pub mod claude;
 pub mod gemini;
 pub mod litert_lm;
+pub mod llamacpp;
 pub mod lmstudio;
 pub mod mistral;
 pub mod ollama;
@@ -29,6 +30,7 @@ pub enum Provider {
     Anthropic(ProviderConfig),
     OpenAi(ProviderConfig),
     Google(ProviderConfig),
+    LlamaCpp(ProviderConfig),
     LMStudio(ProviderConfig),
     Mistral(ProviderConfig),
     Ollama(ProviderConfig),
@@ -50,6 +52,7 @@ impl Provider {
             "anthropic" => Provider::Anthropic(cfg.clone()),
             "openai" => Provider::OpenAi(cfg.clone()),
             "google" => Provider::Google(cfg.clone()),
+            "llamacpp" => Provider::LlamaCpp(cfg.clone()),
             "lmstudio" => Provider::LMStudio(cfg.clone()),
             "mistral" => Provider::Mistral(cfg.clone()),
             "ollama" => Provider::Ollama(cfg.clone()),
@@ -77,6 +80,7 @@ impl Provider {
             Provider::Anthropic(cfg) => claude::stream(cfg, query, system_prompt, tx).await,
             Provider::OpenAi(cfg) => openai::stream(cfg, query, system_prompt, tx).await,
             Provider::Google(cfg) => gemini::stream(cfg, query, system_prompt, tx).await,
+            Provider::LlamaCpp(cfg) => llamacpp::stream(cfg, query, system_prompt, tx).await,
             Provider::LMStudio(cfg) => lmstudio::stream(cfg, query, system_prompt, tx).await,
             Provider::Mistral(cfg) => mistral::stream(cfg, query, system_prompt, tx).await,
             Provider::Ollama(cfg) => ollama::stream(cfg, query, system_prompt, tx).await,