diff --git a/kong/llm/drivers/shared.lua b/kong/llm/drivers/shared.lua index 03c00bbcddb91..55169a29b97dd 100644 --- a/kong/llm/drivers/shared.lua +++ b/kong/llm/drivers/shared.lua @@ -175,7 +175,7 @@ _M.operation_map = { } _M.clear_response_headers = { - shared = { + shared = { -- deprecared, not using "Content-Length", }, openai = { diff --git a/kong/llm/plugin/base.lua b/kong/llm/plugin/base.lua index 0daca7a29419f..4bafcecefb5af 100644 --- a/kong/llm/plugin/base.lua +++ b/kong/llm/plugin/base.lua @@ -110,6 +110,8 @@ function MetaPlugin:header_filter(sub_plugin, conf) -- and seems nginx doesn't support it elseif get_global_ctx("accept_gzip") then + -- for gzip response, don't set content-length at all to align with upstream + kong.response.clear_header("Content-Length") kong.response.set_header("Content-Encoding", "gzip") end diff --git a/kong/llm/plugin/shared-filters/normalize-json-response.lua b/kong/llm/plugin/shared-filters/normalize-json-response.lua index 1e0988f52495d..f98b0d07bf12e 100644 --- a/kong/llm/plugin/shared-filters/normalize-json-response.lua +++ b/kong/llm/plugin/shared-filters/normalize-json-response.lua @@ -2,7 +2,6 @@ local cjson = require("cjson") local ai_plugin_ctx = require("kong.llm.plugin.ctx") local ai_plugin_o11y = require("kong.llm.plugin.observability") -local ai_shared = require("kong.llm.drivers.shared") local _M = { NAME = "normalize-json-response", @@ -57,6 +56,8 @@ local function transform_body(conf) end set_global_ctx("response_body", response_body) -- to be sent out later or consumed by other plugins + + return #response_body end function _M:run(conf) @@ -81,8 +82,9 @@ function _M:run(conf) -- if not streaming, prepare the response body buffer -- this must be called before sending any response headers so that -- we can modify status code if needed + local body_length if not get_global_ctx("stream_mode") then - transform_body(conf) + body_length = transform_body(conf) end -- populate cost @@ -94,12 +96,11 @@ function _M:run(conf) ai_plugin_o11y.metrics_set("llm_usage_cost", 0) end - -- clear shared restricted headers - for _, v in ipairs(ai_shared.clear_response_headers.shared) do - kong.response.clear_header(v) + if not get_global_ctx("accept_gzip") and not get_global_ctx("stream_mode") then + -- otherwise use our transformed body length + kong.response.set_header("Content-Length", body_length) end - if ngx.var.http_kong_debug or conf.model_name_header then local model_t = ai_plugin_ctx.get_request_model_table_inuse() assert(model_t and model_t.name, "model name is missing") @@ -109,4 +110,4 @@ function _M:run(conf) return true end -return _M \ No newline at end of file +return _M