fix: double counting in langfuse instrumentation (#13202)

This commit is contained in:
Aakash Bakhle
2026-01-13 18:52:38 +05:30
committed by GitHub
parent 1a220b2982
commit bddf06907b
4 changed files with 64 additions and 3 deletions

View File

@@ -0,0 +1,47 @@
module Captain::ChatGenerationRecorder
extend ActiveSupport::Concern
include Integrations::LlmInstrumentationConstants
private
def record_llm_generation(chat, message)
return unless valid_llm_message?(message)
# Create a generation span with model and token info for Langfuse cost calculation.
# Note: span duration will be near-zero since we create and end it immediately, but token counts are what Langfuse uses for cost calculation.
tracer.in_span("llm.captain.#{feature_name}.generation") do |span|
set_generation_span_attributes(span, chat, message)
end
rescue StandardError => e
Rails.logger.warn "Failed to record LLM generation: #{e.message}"
end
# Skip non-LLM messages (e.g., tool results that RubyLLM processes internally).
# Check for assistant role rather than token presence - some providers/streaming modes
# may not return token counts, but we still want to capture the generation for evals.
def valid_llm_message?(message)
message.respond_to?(:role) && message.role.to_s == 'assistant'
end
def set_generation_span_attributes(span, chat, message)
generation_attributes(chat, message).each do |key, value|
span.set_attribute(key, value) if value
end
end
def generation_attributes(chat, message)
{
ATTR_GEN_AI_PROVIDER => determine_provider(model),
ATTR_GEN_AI_REQUEST_MODEL => model,
ATTR_GEN_AI_REQUEST_TEMPERATURE => temperature,
ATTR_GEN_AI_USAGE_INPUT_TOKENS => message.input_tokens,
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS => message.respond_to?(:output_tokens) ? message.output_tokens : nil,
ATTR_LANGFUSE_OBSERVATION_INPUT => format_input_messages(chat),
ATTR_LANGFUSE_OBSERVATION_OUTPUT => message.respond_to?(:content) ? message.content.to_s : nil
}
end
def format_input_messages(chat)
chat.messages[0...-1].map { |m| { role: m.role.to_s, content: m.content.to_s } }.to_json
end
end

View File

@@ -1,6 +1,7 @@
module Captain::ChatHelper
include Integrations::LlmInstrumentation
include Captain::ChatResponseHelper
include Captain::ChatGenerationRecorder
def request_chat_completion
log_chat_completion_request
@@ -42,8 +43,11 @@ module Captain::ChatHelper
end
def setup_event_handlers(chat)
chat.on_new_message { start_llm_turn_span(instrumentation_params(chat)) }
chat.on_end_message { |message| end_llm_turn_span(message) }
# NOTE: We only use on_end_message to record the generation with token counts.
# RubyLLM callbacks fire after chunks arrive, not around the API call, so
# span timing won't reflect actual API latency. But Langfuse calculates costs
# from model + token counts, so this is sufficient for cost tracking.
chat.on_end_message { |message| record_llm_generation(chat, message) }
chat.on_tool_call { |tool_call| handle_tool_call(tool_call) }
chat.on_tool_result { |result| handle_tool_result(result) }

View File

@@ -30,7 +30,6 @@ module Integrations::LlmInstrumentation
result = nil
executed = false
tracer.in_span(params[:span_name]) do |span|
set_request_attributes(span, params)
set_metadata_attributes(span, params)
# By default, the input and output of a trace are set from the root observation

View File

@@ -252,6 +252,17 @@ RSpec.describe Integrations::LlmInstrumentation do
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', params[:messages].to_json)
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', result_data.to_json)
end
# Regression test for Langfuse double-counting bug.
# Setting gen_ai.request.model on parent spans causes Langfuse to classify them as
# GENERATIONs instead of SPANs, resulting in cost being counted multiple times
# (once for the parent, once for each child GENERATION).
# See: https://github.com/langfuse/langfuse/issues/7549
it 'does NOT set gen_ai.request.model to avoid being classified as a GENERATION' do
instance.instrument_agent_session(params) { 'result' }
expect(mock_span).not_to have_received(:set_attribute).with('gen_ai.request.model', anything)
end
end
end
end