feat: Instrument captain (#12949)

Co-authored-by: aakashb95 <aakash@chatwoot.com>
This commit is contained in:
Aakash Bakhle
2025-11-28 15:12:55 +05:30
committed by GitHub
parent 92fc286ecb
commit 1ef945de7b
12 changed files with 346 additions and 116 deletions

View File

@@ -88,7 +88,7 @@ Metrics/ModuleLength:
Rails/HelperInstanceVariable:
Exclude:
- enterprise/app/helpers/captain/chat_helper.rb
- 'enterprise/app/helpers/captain/tool_execution_helper.rb'
Rails/ApplicationController:
Exclude:
- 'app/controllers/api/v1/widget/messages_controller.rb'

View File

@@ -1,94 +1,78 @@
module Captain::ChatHelper
include Integrations::LlmInstrumentation
include Captain::ToolExecutionHelper
def request_chat_completion
log_chat_completion_request
response = @client.chat(
parameters: {
model: @model,
messages: @messages,
tools: @tool_registry&.registered_tools || [],
response_format: { type: 'json_object' },
temperature: @assistant&.config&.[]('temperature').to_f || 1
}
)
handle_response(response)
with_agent_session do
response = instrument_llm_call(instrumentation_params) do
@client.chat(
parameters: chat_parameters
)
end
handle_response(response)
end
rescue StandardError => e
Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error in chat completion: #{e}"
raise e
end
def instrumentation_params
{
span_name: "llm.captain.#{feature_name}",
account_id: resolved_account_id,
conversation_id: @conversation_id,
feature_name: feature_name,
model: @model,
messages: @messages,
temperature: temperature,
metadata: {
assistant_id: @assistant&.id
}
}
end
def chat_parameters
{
model: @model,
messages: @messages,
tools: @tool_registry&.registered_tools || [],
response_format: { type: 'json_object' },
temperature: temperature
}
end
def temperature
@assistant&.config&.[]('temperature').to_f || 1
end
def resolved_account_id
@account&.id || @assistant&.account_id
end
private
def handle_response(response)
Rails.logger.debug { "#{self.class.name} Assistant: #{@assistant.id}, Received response #{response}" }
message = response.dig('choices', 0, 'message')
if message['tool_calls']
process_tool_calls(message['tool_calls'])
else
message = JSON.parse(message['content'].strip)
persist_message(message, 'assistant')
message
end
# Ensures all LLM calls and tool executions within an agentic loop
# are grouped under a single trace/session in Langfuse.
#
# Without this guard, each recursive call to request_chat_completion
# (triggered by tool calls) would create a separate trace instead of
# nesting within the existing session span.
def with_agent_session(&)
already_active = @agent_session_active
return yield if already_active
@agent_session_active = true
instrument_agent_session(instrumentation_params, &)
ensure
@agent_session_active = false unless already_active
end
def process_tool_calls(tool_calls)
append_tool_calls(tool_calls)
tool_calls.each do |tool_call|
process_tool_call(tool_call)
end
request_chat_completion
end
def process_tool_call(tool_call)
arguments = JSON.parse(tool_call['function']['arguments'])
function_name = tool_call['function']['name']
tool_call_id = tool_call['id']
if @tool_registry.respond_to?(function_name)
execute_tool(function_name, arguments, tool_call_id)
else
process_invalid_tool_call(function_name, tool_call_id)
end
end
def execute_tool(function_name, arguments, tool_call_id)
persist_message(
{
content: I18n.t('captain.copilot.using_tool', function_name: function_name),
function_name: function_name
},
'assistant_thinking'
)
result = @tool_registry.send(function_name, arguments)
persist_message(
{
content: I18n.t('captain.copilot.completed_tool_call', function_name: function_name),
function_name: function_name
},
'assistant_thinking'
)
append_tool_response(result, tool_call_id)
end
def append_tool_calls(tool_calls)
@messages << {
role: 'assistant',
tool_calls: tool_calls
}
end
def process_invalid_tool_call(function_name, tool_call_id)
persist_message({ content: I18n.t('captain.copilot.invalid_tool_call'), function_name: function_name }, 'assistant_thinking')
append_tool_response(I18n.t('captain.copilot.tool_not_available'), tool_call_id)
end
def append_tool_response(content, tool_call_id)
@messages << {
role: 'tool',
tool_call_id: tool_call_id,
content: content
}
# Must be implemented by including class to identify the feature for instrumentation.
# Used for Langfuse tagging and span naming.
def feature_name
raise NotImplementedError, "#{self.class.name} must implement #feature_name"
end
def log_chat_completion_request

View File

@@ -0,0 +1,83 @@
module Captain::ToolExecutionHelper
private
def handle_response(response)
Rails.logger.debug { "#{self.class.name} Assistant: #{@assistant.id}, Received response #{response}" }
message = response.dig('choices', 0, 'message')
if message['tool_calls']
process_tool_calls(message['tool_calls'])
else
message = JSON.parse(message['content'].strip)
persist_message(message, 'assistant')
message
end
end
def process_tool_calls(tool_calls)
append_tool_calls(tool_calls)
tool_calls.each { |tool_call| process_tool_call(tool_call) }
request_chat_completion
end
def process_tool_call(tool_call)
arguments = JSON.parse(tool_call['function']['arguments'])
function_name = tool_call['function']['name']
tool_call_id = tool_call['id']
if @tool_registry.respond_to?(function_name)
execute_tool(function_name, arguments, tool_call_id)
else
process_invalid_tool_call(function_name, tool_call_id)
end
end
def execute_tool(function_name, arguments, tool_call_id)
persist_tool_status(function_name, 'captain.copilot.using_tool')
result = perform_tool_call(function_name, arguments)
persist_tool_status(function_name, 'captain.copilot.completed_tool_call')
append_tool_response(result, tool_call_id)
end
def perform_tool_call(function_name, arguments)
instrument_tool_call(function_name, arguments) do
@tool_registry.send(function_name, arguments)
end
rescue StandardError => e
Rails.logger.error "Tool #{function_name} failed: #{e.message}"
"Error executing #{function_name}: #{e.message}"
end
def persist_tool_status(function_name, translation_key)
persist_message(
{
content: I18n.t(translation_key, function_name: function_name),
function_name: function_name
},
'assistant_thinking'
)
end
def append_tool_calls(tool_calls)
@messages << {
role: 'assistant',
tool_calls: tool_calls
}
end
def process_invalid_tool_call(function_name, tool_call_id)
persist_message(
{ content: I18n.t('captain.copilot.invalid_tool_call'), function_name: function_name },
'assistant_thinking'
)
append_tool_response(I18n.t('captain.copilot.tool_not_available'), tool_call_id)
end
def append_tool_response(content, tool_call_id)
@messages << {
role: 'tool',
tool_call_id: tool_call_id,
content: content
}
end
end

View File

@@ -30,7 +30,7 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob
delegate :account, :inbox, to: :@conversation
def generate_and_process_response
@response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response(
@response = Captain::Llm::AssistantChatService.new(assistant: @assistant, conversation_id: @conversation.id).generate_response(
message_history: collect_previous_messages
)
process_response

View File

@@ -15,11 +15,14 @@ class Captain::Copilot::ResponseJob < ApplicationJob
private
def generate_chat_response(assistant:, conversation_id:, user_id:, copilot_thread_id:, message:)
Captain::Copilot::ChatService.new(
service = Captain::Copilot::ChatService.new(
assistant,
user_id: user_id,
copilot_thread_id: copilot_thread_id,
conversation_id: conversation_id
).generate_response(message)
)
# When using copilot_thread, message is already in previous_history
# Pass nil to avoid duplicate
service.generate_response(copilot_thread_id.present? ? nil : message)
end
end

View File

@@ -13,6 +13,7 @@ class Captain::Copilot::ChatService < Llm::BaseOpenAiService
@user = nil
@copilot_thread = nil
@previous_history = []
@conversation_id = config[:conversation_id]
setup_user(config)
setup_message_history(config)
register_tools
@@ -113,4 +114,8 @@ class Captain::Copilot::ChatService < Llm::BaseOpenAiService
message_type: message_type
)
end
def feature_name
'copilot'
end
end

View File

@@ -3,10 +3,11 @@ require 'openai'
class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService
include Captain::ChatHelper
def initialize(assistant: nil)
def initialize(assistant: nil, conversation_id: nil)
super()
@assistant = assistant
@conversation_id = conversation_id
@messages = [system_message]
@response = ''
register_tools
@@ -42,4 +43,8 @@ class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService
def persist_message(message, message_type = 'assistant')
# No need to implement
end
def feature_name
'assistant'
end
end

View File

@@ -1,28 +1,10 @@
# frozen_string_literal: true
require 'opentelemetry_config'
require_relative 'llm_instrumentation_constants'
module Integrations::LlmInstrumentation
# OpenTelemetry attribute names following GenAI semantic conventions
# https://opentelemetry.io/docs/specs/semconv/gen-ai/
ATTR_GEN_AI_PROVIDER = 'gen_ai.provider.name'
ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model'
ATTR_GEN_AI_REQUEST_TEMPERATURE = 'gen_ai.request.temperature'
ATTR_GEN_AI_PROMPT_ROLE = 'gen_ai.prompt.%d.role'
ATTR_GEN_AI_PROMPT_CONTENT = 'gen_ai.prompt.%d.content'
ATTR_GEN_AI_COMPLETION_ROLE = 'gen_ai.completion.0.role'
ATTR_GEN_AI_COMPLETION_CONTENT = 'gen_ai.completion.0.content'
ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens'
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens'
ATTR_GEN_AI_USAGE_TOTAL_TOKENS = 'gen_ai.usage.total_tokens'
ATTR_GEN_AI_RESPONSE_ERROR = 'gen_ai.response.error'
ATTR_GEN_AI_RESPONSE_ERROR_CODE = 'gen_ai.response.error_code'
# Langfuse-specific attributes
# https://langfuse.com/integrations/native/opentelemetry#property-mapping
ATTR_LANGFUSE_USER_ID = 'langfuse.user.id'
ATTR_LANGFUSE_SESSION_ID = 'langfuse.session.id'
ATTR_LANGFUSE_TAGS = 'langfuse.trace.tags'
include Integrations::LlmInstrumentationConstants
def tracer
@tracer ||= OpentelemetryConfig.tracer
@@ -42,6 +24,37 @@ module Integrations::LlmInstrumentation
yield
end
def instrument_agent_session(params)
return yield unless ChatwootApp.otel_enabled?
tracer.in_span(params[:span_name]) do |span|
set_metadata_attributes(span, params)
# By default, the input and output of a trace are set from the root observation
span.set_attribute(ATTR_LANGFUSE_OBSERVATION_INPUT, params[:messages].to_json)
result = yield
span.set_attribute(ATTR_LANGFUSE_OBSERVATION_OUTPUT, result.to_json)
result
end
rescue StandardError => e
ChatwootExceptionTracker.new(e, account: params[:account]).capture_exception
yield
end
def instrument_tool_call(tool_name, arguments)
# There is no error handling because tools can fail and LLMs should be
# aware of those failures and factor them into their response.
return yield unless ChatwootApp.otel_enabled?
tracer.in_span(format(TOOL_SPAN_NAME, tool_name)) do |span|
span.set_attribute(ATTR_LANGFUSE_OBSERVATION_INPUT, arguments.to_json)
result = yield
span.set_attribute(ATTR_LANGFUSE_OBSERVATION_OUTPUT, result.to_json)
result
end
end
private
def setup_span_attributes(span, params)
@@ -62,8 +75,11 @@ module Integrations::LlmInstrumentation
def set_prompt_messages(span, messages)
messages.each_with_index do |msg, idx|
span.set_attribute(format(ATTR_GEN_AI_PROMPT_ROLE, idx), msg['role'])
span.set_attribute(format(ATTR_GEN_AI_PROMPT_CONTENT, idx), msg['content'])
role = msg[:role] || msg['role']
content = msg[:content] || msg['content']
span.set_attribute(format(ATTR_GEN_AI_PROMPT_ROLE, idx), role)
span.set_attribute(format(ATTR_GEN_AI_PROMPT_CONTENT, idx), content.to_s)
end
end
@@ -72,6 +88,12 @@ module Integrations::LlmInstrumentation
span.set_attribute(ATTR_LANGFUSE_USER_ID, params[:account_id].to_s) if params[:account_id]
span.set_attribute(ATTR_LANGFUSE_SESSION_ID, session_id) if session_id.present?
span.set_attribute(ATTR_LANGFUSE_TAGS, [params[:feature_name]].to_json)
return unless params[:metadata].is_a?(Hash)
params[:metadata].each do |key, value|
span.set_attribute(format(ATTR_LANGFUSE_METADATA, key), value.to_s)
end
end
def set_completion_attributes(span, result)
@@ -81,26 +103,29 @@ module Integrations::LlmInstrumentation
end
def set_completion_message(span, result)
return if result[:message].blank?
message = result[:message] || result.dig('choices', 0, 'message', 'content')
return if message.blank?
span.set_attribute(ATTR_GEN_AI_COMPLETION_ROLE, 'assistant')
span.set_attribute(ATTR_GEN_AI_COMPLETION_CONTENT, result[:message])
span.set_attribute(ATTR_GEN_AI_COMPLETION_CONTENT, message)
end
def set_usage_metrics(span, result)
return if result[:usage].blank?
usage = result[:usage] || result['usage']
return if usage.blank?
usage = result[:usage]
span.set_attribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, usage['prompt_tokens']) if usage['prompt_tokens']
span.set_attribute(ATTR_GEN_AI_USAGE_OUTPUT_TOKENS, usage['completion_tokens']) if usage['completion_tokens']
span.set_attribute(ATTR_GEN_AI_USAGE_TOTAL_TOKENS, usage['total_tokens']) if usage['total_tokens']
end
def set_error_attributes(span, result)
return if result[:error].blank?
error = result[:error] || result['error']
return if error.blank?
span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR, result[:error].to_json)
span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR_CODE, result[:error_code]) if result[:error_code]
span.status = OpenTelemetry::Trace::Status.error("API Error: #{result[:error_code]}")
error_code = result[:error_code] || result['error_code']
span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR, error.to_json)
span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR_CODE, error_code) if error_code
span.status = OpenTelemetry::Trace::Status.error("API Error: #{error_code}")
end
end

View File

@@ -0,0 +1,31 @@
# frozen_string_literal: true
module Integrations::LlmInstrumentationConstants
# OpenTelemetry attribute names following GenAI semantic conventions
# https://opentelemetry.io/docs/specs/semconv/gen-ai/
ATTR_GEN_AI_PROVIDER = 'gen_ai.provider.name'
ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model'
ATTR_GEN_AI_REQUEST_TEMPERATURE = 'gen_ai.request.temperature'
ATTR_GEN_AI_PROMPT_ROLE = 'gen_ai.prompt.%d.role'
ATTR_GEN_AI_PROMPT_CONTENT = 'gen_ai.prompt.%d.content'
ATTR_GEN_AI_COMPLETION_ROLE = 'gen_ai.completion.0.role'
ATTR_GEN_AI_COMPLETION_CONTENT = 'gen_ai.completion.0.content'
ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens'
ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens'
ATTR_GEN_AI_USAGE_TOTAL_TOKENS = 'gen_ai.usage.total_tokens'
ATTR_GEN_AI_RESPONSE_ERROR = 'gen_ai.response.error'
ATTR_GEN_AI_RESPONSE_ERROR_CODE = 'gen_ai.response.error_code'
TOOL_SPAN_NAME = 'tool.%s'
# Langfuse-specific attributes
# https://langfuse.com/integrations/native/opentelemetry#property-mapping
ATTR_LANGFUSE_USER_ID = 'langfuse.user.id'
ATTR_LANGFUSE_SESSION_ID = 'langfuse.session.id'
ATTR_LANGFUSE_TAGS = 'langfuse.trace.tags'
ATTR_LANGFUSE_METADATA = 'langfuse.trace.metadata.%s'
ATTR_LANGFUSE_TRACE_INPUT = 'langfuse.trace.input'
ATTR_LANGFUSE_TRACE_OUTPUT = 'langfuse.trace.output'
ATTR_LANGFUSE_OBSERVATION_INPUT = 'langfuse.observation.input'
ATTR_LANGFUSE_OBSERVATION_OUTPUT = 'langfuse.observation.output'
end

View File

@@ -28,7 +28,7 @@ RSpec.describe Captain::Conversation::ResponseBuilderJob, type: :job do
end
it 'uses Captain::Llm::AssistantChatService' do
expect(Captain::Llm::AssistantChatService).to receive(:new).with(assistant: assistant)
expect(Captain::Llm::AssistantChatService).to receive(:new).with(assistant: assistant, conversation_id: conversation.id)
expect(Captain::Assistant::AgentRunnerService).not_to receive(:new)
described_class.perform_now(conversation, assistant)

View File

@@ -18,7 +18,9 @@ RSpec.describe Captain::Copilot::ResponseJob, type: :job do
copilot_thread_id: copilot_thread.id,
conversation_id: conversation_id
).and_return(chat_service)
allow(chat_service).to receive(:generate_response).with(message)
# When copilot_thread_id is present, message is already in previous_history
# so nil is passed to avoid duplicate
allow(chat_service).to receive(:generate_response).with(nil)
end
it 'initializes ChatService with correct parameters and calls generate_response' do
@@ -28,7 +30,9 @@ RSpec.describe Captain::Copilot::ResponseJob, type: :job do
copilot_thread_id: copilot_thread.id,
conversation_id: conversation_id
)
expect(chat_service).to receive(:generate_response).with(message)
# Message is already persisted in copilot_thread.previous_history,
# so we pass nil to prevent duplicate user messages
expect(chat_service).to receive(:generate_response).with(nil)
described_class.perform_now(
assistant: assistant,
conversation_id: conversation_id,

View File

@@ -213,5 +213,95 @@ RSpec.describe Integrations::LlmInstrumentation do
expect(OpenTelemetry::Trace::Status).to have_received(:error).with('API Error: rate_limit_exceeded')
end
end
describe '#instrument_agent_session' do
context 'when OTEL provider is not configured' do
before { otel_config.update(value: '') }
it 'executes the block without tracing' do
result = instance.instrument_agent_session(params) { 'my_result' }
expect(result).to eq('my_result')
end
end
context 'when OTEL provider is configured' do
let(:mock_span) { instance_double(OpenTelemetry::Trace::Span) }
let(:mock_tracer) { instance_double(OpenTelemetry::Trace::Tracer) }
before do
allow(mock_span).to receive(:set_attribute)
allow(instance).to receive(:tracer).and_return(mock_tracer)
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
end
it 'executes the block and returns the result' do
result = instance.instrument_agent_session(params) { 'my_result' }
expect(result).to eq('my_result')
end
it 'returns the block result even if instrumentation has errors' do
allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed'))
result = instance.instrument_agent_session(params) { 'my_result' }
expect(result).to eq('my_result')
end
it 'sets trace input and output attributes' do
result_data = { content: 'AI response' }
instance.instrument_agent_session(params) { result_data }
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', params[:messages].to_json)
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', result_data.to_json)
end
end
end
describe '#instrument_tool_call' do
let(:tool_name) { 'search_documents' }
let(:arguments) { { query: 'test query' } }
context 'when OTEL provider is not configured' do
before { otel_config.update(value: '') }
it 'executes the block without tracing' do
result = instance.instrument_tool_call(tool_name, arguments) { 'tool_result' }
expect(result).to eq('tool_result')
end
end
context 'when OTEL provider is configured' do
let(:mock_span) { instance_double(OpenTelemetry::Trace::Span) }
let(:mock_tracer) { instance_double(OpenTelemetry::Trace::Tracer) }
before do
allow(mock_span).to receive(:set_attribute)
allow(instance).to receive(:tracer).and_return(mock_tracer)
allow(mock_tracer).to receive(:in_span).and_yield(mock_span)
end
it 'executes the block and returns the result' do
result = instance.instrument_tool_call(tool_name, arguments) { 'tool_result' }
expect(result).to eq('tool_result')
end
it 'propagates instrumentation errors' do
allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed'))
expect do
instance.instrument_tool_call(tool_name, arguments) { 'tool_result' }
end.to raise_error(StandardError, 'Instrumentation failed')
end
it 'creates a span with tool name and sets observation attributes' do
tool_result = { documents: ['doc1'] }
instance.instrument_tool_call(tool_name, arguments) { tool_result }
expect(mock_tracer).to have_received(:in_span).with('tool.search_documents')
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', arguments.to_json)
expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', tool_result.to_json)
end
end
end
end
end