diff --git a/ask-llm.clj b/ask-llm.clj
index b114557..4fb2b85 100755
--- a/ask-llm.clj
+++ b/ask-llm.clj
@@ -7,6 +7,7 @@
 (def LLM-API-BASE-URL (or (System/getenv "LLM_API_BASE_URL") "https://api.openai.com/v1"))
 (def LLM-API-KEY (or (System/getenv "LLM_API_KEY") (System/getenv "OPENAI_API_KEY")))
 (def LLM-CHAT-MODEL (System/getenv "LLM_CHAT_MODEL"))
+(def LLM-STREAMING (not= "no" (System/getenv "LLM_STREAMING")))
 
 (def LLM-DEBUG (System/getenv "LLM_DEBUG"))
 
@@ -15,11 +16,6 @@
               :authorization (str "Bearer " bearer)}
       {:content-type "application/json"}))
 
-(defn http-post [url bearer payload]
-  (-> (http/post url {:headers (http-json-headers bearer)
-                      :body (json/encode payload)})
-      :body (json/parse-string true)))
-
 (def LLM-CHAT-URL (str LLM-API-BASE-URL "/chat/completions"))
 
 (def SYSTEM-PROMPT "Answer the question politely and concisely.")
@@ -30,19 +26,61 @@
 (defn add-message! [role content]
   (swap! llm-messages conj {:role role :content content}))
 
-(defn chat [messages]
-  (let [body {:messages messages
-              :model (or LLM-CHAT-MODEL "gpt-4o-mini")
-              :stop ["<|im_end|>" "<|end|>" "<|eot_id|>"]
-              :max_tokens 200
-              :temperature 0}
-        response (http-post LLM-CHAT-URL LLM-API-KEY body)]
-    (-> response :choices first :message :content str/trim)))
+(defn make-reader [response]
+  (java.io.BufferedReader. (java.io.InputStreamReader. (:body response))))
+
+(defn json-parse [str]
+  (json/parse-string str true))
+
+(defn parse-line [line handler]
+  (try
+    (some-> line json-parse
+            :choices first :delta :content
+            (#(do (when handler (handler %))
+                  (str %))))
+    (catch Exception e nil)))
+
+(defn decode-stream [response handler]
+  (with-open [reader (make-reader response)]
+    (loop [answer ""]
+      (if-let [line (.readLine reader)]
+        (let [trimmed-line (str/trim line)]
+          (cond
+            (str/blank? trimmed-line) (recur answer)
+            (str/starts-with? trimmed-line "data: ")
+            (let [new-answer (parse-line (str/trim (subs trimmed-line 6)) handler)]
+              (recur (or new-answer answer)))
+            :else (recur answer)))
+        answer))))
+
+(defn chat [messages handler]
+  (let [stream (and LLM-STREAMING (some? handler))
+        payload {:messages messages
+                 :model (or LLM-CHAT-MODEL "gpt-4o-mini")
+                 :stop ["<|im_end|>" "<|end|>" "<|eot_id|>"]
+                 :max_tokens 200
+                 :temperature 0
+                 :stream stream}
+        options {:headers (http-json-headers LLM-API-KEY)
+                 :body (json/encode payload)}
+        options (if stream (assoc options :as :stream) options)
+        response (http/post LLM-CHAT-URL options)]
+    (if stream
+      (decode-stream response handler)
+      (let [body (-> response :body json-parse)
+            answer (-> body :choices first :message :content str/trim)]
+        (when handler (handler answer))
+        answer))))
+
+(defn print-stdout [str]
+  (print str)
+  (flush))
 
 (defn ask-llm [question]
   (add-message! "user" question)
-  (let [answer (chat @llm-messages)]
+  (let [answer (chat @llm-messages print-stdout)]
     (add-message! "assistant" answer)
+    (println)
     answer))
 
 (defmacro measure-time [f]
@@ -58,7 +96,7 @@
     (flush)
     (let [question (read-line)]
       (when question
-        (-> question ask-llm println measure-time)
+        (-> question ask-llm measure-time)
         (println)
         (flush)
         (recur)))))