From 1ef945de7bd88ec1a3b88dff5d535a25e401867b Mon Sep 17 00:00:00 2001 From: Aakash Bakhle <48802744+aakashb95@users.noreply.github.com> Date: Fri, 28 Nov 2025 15:12:55 +0530 Subject: [PATCH] feat: Instrument captain (#12949) Co-authored-by: aakashb95 --- .rubocop.yml | 2 +- enterprise/app/helpers/captain/chat_helper.rb | 140 ++++++++---------- .../helpers/captain/tool_execution_helper.rb | 83 +++++++++++ .../conversation/response_builder_job.rb | 2 +- .../app/jobs/captain/copilot/response_job.rb | 7 +- .../services/captain/copilot/chat_service.rb | 5 + .../captain/llm/assistant_chat_service.rb | 7 +- lib/integrations/llm_instrumentation.rb | 85 +++++++---- .../llm_instrumentation_constants.rb | 31 ++++ .../conversation/response_builder_job_spec.rb | 2 +- .../jobs/captain/copilot/response_job_spec.rb | 8 +- .../integrations/llm_instrumentation_spec.rb | 90 +++++++++++ 12 files changed, 346 insertions(+), 116 deletions(-) create mode 100644 enterprise/app/helpers/captain/tool_execution_helper.rb create mode 100644 lib/integrations/llm_instrumentation_constants.rb diff --git a/.rubocop.yml b/.rubocop.yml index 4b32991d1..f5b8a2c1c 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -88,7 +88,7 @@ Metrics/ModuleLength: Rails/HelperInstanceVariable: Exclude: - enterprise/app/helpers/captain/chat_helper.rb - + - 'enterprise/app/helpers/captain/tool_execution_helper.rb' Rails/ApplicationController: Exclude: - 'app/controllers/api/v1/widget/messages_controller.rb' diff --git a/enterprise/app/helpers/captain/chat_helper.rb b/enterprise/app/helpers/captain/chat_helper.rb index f90b8d07e..c2f4311a7 100644 --- a/enterprise/app/helpers/captain/chat_helper.rb +++ b/enterprise/app/helpers/captain/chat_helper.rb @@ -1,94 +1,78 @@ module Captain::ChatHelper + include Integrations::LlmInstrumentation + include Captain::ToolExecutionHelper + def request_chat_completion log_chat_completion_request - response = @client.chat( - parameters: { - model: @model, - messages: @messages, - tools: @tool_registry&.registered_tools || [], - response_format: { type: 'json_object' }, - temperature: @assistant&.config&.[]('temperature').to_f || 1 - } - ) - - handle_response(response) + with_agent_session do + response = instrument_llm_call(instrumentation_params) do + @client.chat( + parameters: chat_parameters + ) + end + handle_response(response) + end rescue StandardError => e Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error in chat completion: #{e}" raise e end + def instrumentation_params + { + span_name: "llm.captain.#{feature_name}", + account_id: resolved_account_id, + conversation_id: @conversation_id, + feature_name: feature_name, + model: @model, + messages: @messages, + temperature: temperature, + metadata: { + assistant_id: @assistant&.id + } + } + end + + def chat_parameters + { + model: @model, + messages: @messages, + tools: @tool_registry&.registered_tools || [], + response_format: { type: 'json_object' }, + temperature: temperature + } + end + + def temperature + @assistant&.config&.[]('temperature').to_f || 1 + end + + def resolved_account_id + @account&.id || @assistant&.account_id + end + private - def handle_response(response) - Rails.logger.debug { "#{self.class.name} Assistant: #{@assistant.id}, Received response #{response}" } - message = response.dig('choices', 0, 'message') - if message['tool_calls'] - process_tool_calls(message['tool_calls']) - else - message = JSON.parse(message['content'].strip) - persist_message(message, 'assistant') - message - end + # Ensures all LLM calls and tool executions within an agentic loop + # are grouped under a single trace/session in Langfuse. + # + # Without this guard, each recursive call to request_chat_completion + # (triggered by tool calls) would create a separate trace instead of + # nesting within the existing session span. + def with_agent_session(&) + already_active = @agent_session_active + return yield if already_active + + @agent_session_active = true + instrument_agent_session(instrumentation_params, &) + ensure + @agent_session_active = false unless already_active end - def process_tool_calls(tool_calls) - append_tool_calls(tool_calls) - tool_calls.each do |tool_call| - process_tool_call(tool_call) - end - request_chat_completion - end - - def process_tool_call(tool_call) - arguments = JSON.parse(tool_call['function']['arguments']) - function_name = tool_call['function']['name'] - tool_call_id = tool_call['id'] - - if @tool_registry.respond_to?(function_name) - execute_tool(function_name, arguments, tool_call_id) - else - process_invalid_tool_call(function_name, tool_call_id) - end - end - - def execute_tool(function_name, arguments, tool_call_id) - persist_message( - { - content: I18n.t('captain.copilot.using_tool', function_name: function_name), - function_name: function_name - }, - 'assistant_thinking' - ) - result = @tool_registry.send(function_name, arguments) - persist_message( - { - content: I18n.t('captain.copilot.completed_tool_call', function_name: function_name), - function_name: function_name - }, - 'assistant_thinking' - ) - append_tool_response(result, tool_call_id) - end - - def append_tool_calls(tool_calls) - @messages << { - role: 'assistant', - tool_calls: tool_calls - } - end - - def process_invalid_tool_call(function_name, tool_call_id) - persist_message({ content: I18n.t('captain.copilot.invalid_tool_call'), function_name: function_name }, 'assistant_thinking') - append_tool_response(I18n.t('captain.copilot.tool_not_available'), tool_call_id) - end - - def append_tool_response(content, tool_call_id) - @messages << { - role: 'tool', - tool_call_id: tool_call_id, - content: content - } + # Must be implemented by including class to identify the feature for instrumentation. + # Used for Langfuse tagging and span naming. + def feature_name + raise NotImplementedError, "#{self.class.name} must implement #feature_name" end def log_chat_completion_request diff --git a/enterprise/app/helpers/captain/tool_execution_helper.rb b/enterprise/app/helpers/captain/tool_execution_helper.rb new file mode 100644 index 000000000..45c546c9c --- /dev/null +++ b/enterprise/app/helpers/captain/tool_execution_helper.rb @@ -0,0 +1,83 @@ +module Captain::ToolExecutionHelper + private + + def handle_response(response) + Rails.logger.debug { "#{self.class.name} Assistant: #{@assistant.id}, Received response #{response}" } + message = response.dig('choices', 0, 'message') + + if message['tool_calls'] + process_tool_calls(message['tool_calls']) + else + message = JSON.parse(message['content'].strip) + persist_message(message, 'assistant') + message + end + end + + def process_tool_calls(tool_calls) + append_tool_calls(tool_calls) + tool_calls.each { |tool_call| process_tool_call(tool_call) } + request_chat_completion + end + + def process_tool_call(tool_call) + arguments = JSON.parse(tool_call['function']['arguments']) + function_name = tool_call['function']['name'] + tool_call_id = tool_call['id'] + + if @tool_registry.respond_to?(function_name) + execute_tool(function_name, arguments, tool_call_id) + else + process_invalid_tool_call(function_name, tool_call_id) + end + end + + def execute_tool(function_name, arguments, tool_call_id) + persist_tool_status(function_name, 'captain.copilot.using_tool') + result = perform_tool_call(function_name, arguments) + persist_tool_status(function_name, 'captain.copilot.completed_tool_call') + append_tool_response(result, tool_call_id) + end + + def perform_tool_call(function_name, arguments) + instrument_tool_call(function_name, arguments) do + @tool_registry.send(function_name, arguments) + end + rescue StandardError => e + Rails.logger.error "Tool #{function_name} failed: #{e.message}" + "Error executing #{function_name}: #{e.message}" + end + + def persist_tool_status(function_name, translation_key) + persist_message( + { + content: I18n.t(translation_key, function_name: function_name), + function_name: function_name + }, + 'assistant_thinking' + ) + end + + def append_tool_calls(tool_calls) + @messages << { + role: 'assistant', + tool_calls: tool_calls + } + end + + def process_invalid_tool_call(function_name, tool_call_id) + persist_message( + { content: I18n.t('captain.copilot.invalid_tool_call'), function_name: function_name }, + 'assistant_thinking' + ) + append_tool_response(I18n.t('captain.copilot.tool_not_available'), tool_call_id) + end + + def append_tool_response(content, tool_call_id) + @messages << { + role: 'tool', + tool_call_id: tool_call_id, + content: content + } + end +end diff --git a/enterprise/app/jobs/captain/conversation/response_builder_job.rb b/enterprise/app/jobs/captain/conversation/response_builder_job.rb index 828123321..f218ff68c 100644 --- a/enterprise/app/jobs/captain/conversation/response_builder_job.rb +++ b/enterprise/app/jobs/captain/conversation/response_builder_job.rb @@ -30,7 +30,7 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob delegate :account, :inbox, to: :@conversation def generate_and_process_response - @response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response( + @response = Captain::Llm::AssistantChatService.new(assistant: @assistant, conversation_id: @conversation.id).generate_response( message_history: collect_previous_messages ) process_response diff --git a/enterprise/app/jobs/captain/copilot/response_job.rb b/enterprise/app/jobs/captain/copilot/response_job.rb index fca5b0cce..f3e2653f7 100644 --- a/enterprise/app/jobs/captain/copilot/response_job.rb +++ b/enterprise/app/jobs/captain/copilot/response_job.rb @@ -15,11 +15,14 @@ class Captain::Copilot::ResponseJob < ApplicationJob private def generate_chat_response(assistant:, conversation_id:, user_id:, copilot_thread_id:, message:) - Captain::Copilot::ChatService.new( + service = Captain::Copilot::ChatService.new( assistant, user_id: user_id, copilot_thread_id: copilot_thread_id, conversation_id: conversation_id - ).generate_response(message) + ) + # When using copilot_thread, message is already in previous_history + # Pass nil to avoid duplicate + service.generate_response(copilot_thread_id.present? ? nil : message) end end diff --git a/enterprise/app/services/captain/copilot/chat_service.rb b/enterprise/app/services/captain/copilot/chat_service.rb index ca7057c86..6db7a973b 100644 --- a/enterprise/app/services/captain/copilot/chat_service.rb +++ b/enterprise/app/services/captain/copilot/chat_service.rb @@ -13,6 +13,7 @@ class Captain::Copilot::ChatService < Llm::BaseOpenAiService @user = nil @copilot_thread = nil @previous_history = [] + @conversation_id = config[:conversation_id] setup_user(config) setup_message_history(config) register_tools @@ -113,4 +114,8 @@ class Captain::Copilot::ChatService < Llm::BaseOpenAiService message_type: message_type ) end + + def feature_name + 'copilot' + end end diff --git a/enterprise/app/services/captain/llm/assistant_chat_service.rb b/enterprise/app/services/captain/llm/assistant_chat_service.rb index ca8fafaa0..54e13e1d0 100644 --- a/enterprise/app/services/captain/llm/assistant_chat_service.rb +++ b/enterprise/app/services/captain/llm/assistant_chat_service.rb @@ -3,10 +3,11 @@ require 'openai' class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService include Captain::ChatHelper - def initialize(assistant: nil) + def initialize(assistant: nil, conversation_id: nil) super() @assistant = assistant + @conversation_id = conversation_id @messages = [system_message] @response = '' register_tools @@ -42,4 +43,8 @@ class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService def persist_message(message, message_type = 'assistant') # No need to implement end + + def feature_name + 'assistant' + end end diff --git a/lib/integrations/llm_instrumentation.rb b/lib/integrations/llm_instrumentation.rb index ff9036bee..919713dd3 100644 --- a/lib/integrations/llm_instrumentation.rb +++ b/lib/integrations/llm_instrumentation.rb @@ -1,28 +1,10 @@ # frozen_string_literal: true require 'opentelemetry_config' +require_relative 'llm_instrumentation_constants' module Integrations::LlmInstrumentation - # OpenTelemetry attribute names following GenAI semantic conventions - # https://opentelemetry.io/docs/specs/semconv/gen-ai/ - ATTR_GEN_AI_PROVIDER = 'gen_ai.provider.name' - ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model' - ATTR_GEN_AI_REQUEST_TEMPERATURE = 'gen_ai.request.temperature' - ATTR_GEN_AI_PROMPT_ROLE = 'gen_ai.prompt.%d.role' - ATTR_GEN_AI_PROMPT_CONTENT = 'gen_ai.prompt.%d.content' - ATTR_GEN_AI_COMPLETION_ROLE = 'gen_ai.completion.0.role' - ATTR_GEN_AI_COMPLETION_CONTENT = 'gen_ai.completion.0.content' - ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens' - ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens' - ATTR_GEN_AI_USAGE_TOTAL_TOKENS = 'gen_ai.usage.total_tokens' - ATTR_GEN_AI_RESPONSE_ERROR = 'gen_ai.response.error' - ATTR_GEN_AI_RESPONSE_ERROR_CODE = 'gen_ai.response.error_code' - - # Langfuse-specific attributes - # https://langfuse.com/integrations/native/opentelemetry#property-mapping - ATTR_LANGFUSE_USER_ID = 'langfuse.user.id' - ATTR_LANGFUSE_SESSION_ID = 'langfuse.session.id' - ATTR_LANGFUSE_TAGS = 'langfuse.trace.tags' + include Integrations::LlmInstrumentationConstants def tracer @tracer ||= OpentelemetryConfig.tracer @@ -42,6 +24,37 @@ module Integrations::LlmInstrumentation yield end + def instrument_agent_session(params) + return yield unless ChatwootApp.otel_enabled? + + tracer.in_span(params[:span_name]) do |span| + set_metadata_attributes(span, params) + + # By default, the input and output of a trace are set from the root observation + span.set_attribute(ATTR_LANGFUSE_OBSERVATION_INPUT, params[:messages].to_json) + result = yield + span.set_attribute(ATTR_LANGFUSE_OBSERVATION_OUTPUT, result.to_json) + + result + end + rescue StandardError => e + ChatwootExceptionTracker.new(e, account: params[:account]).capture_exception + yield + end + + def instrument_tool_call(tool_name, arguments) + # There is no error handling because tools can fail and LLMs should be + # aware of those failures and factor them into their response. + return yield unless ChatwootApp.otel_enabled? + + tracer.in_span(format(TOOL_SPAN_NAME, tool_name)) do |span| + span.set_attribute(ATTR_LANGFUSE_OBSERVATION_INPUT, arguments.to_json) + result = yield + span.set_attribute(ATTR_LANGFUSE_OBSERVATION_OUTPUT, result.to_json) + result + end + end + private def setup_span_attributes(span, params) @@ -62,8 +75,11 @@ module Integrations::LlmInstrumentation def set_prompt_messages(span, messages) messages.each_with_index do |msg, idx| - span.set_attribute(format(ATTR_GEN_AI_PROMPT_ROLE, idx), msg['role']) - span.set_attribute(format(ATTR_GEN_AI_PROMPT_CONTENT, idx), msg['content']) + role = msg[:role] || msg['role'] + content = msg[:content] || msg['content'] + + span.set_attribute(format(ATTR_GEN_AI_PROMPT_ROLE, idx), role) + span.set_attribute(format(ATTR_GEN_AI_PROMPT_CONTENT, idx), content.to_s) end end @@ -72,6 +88,12 @@ module Integrations::LlmInstrumentation span.set_attribute(ATTR_LANGFUSE_USER_ID, params[:account_id].to_s) if params[:account_id] span.set_attribute(ATTR_LANGFUSE_SESSION_ID, session_id) if session_id.present? span.set_attribute(ATTR_LANGFUSE_TAGS, [params[:feature_name]].to_json) + + return unless params[:metadata].is_a?(Hash) + + params[:metadata].each do |key, value| + span.set_attribute(format(ATTR_LANGFUSE_METADATA, key), value.to_s) + end end def set_completion_attributes(span, result) @@ -81,26 +103,29 @@ module Integrations::LlmInstrumentation end def set_completion_message(span, result) - return if result[:message].blank? + message = result[:message] || result.dig('choices', 0, 'message', 'content') + return if message.blank? span.set_attribute(ATTR_GEN_AI_COMPLETION_ROLE, 'assistant') - span.set_attribute(ATTR_GEN_AI_COMPLETION_CONTENT, result[:message]) + span.set_attribute(ATTR_GEN_AI_COMPLETION_CONTENT, message) end def set_usage_metrics(span, result) - return if result[:usage].blank? + usage = result[:usage] || result['usage'] + return if usage.blank? - usage = result[:usage] span.set_attribute(ATTR_GEN_AI_USAGE_INPUT_TOKENS, usage['prompt_tokens']) if usage['prompt_tokens'] span.set_attribute(ATTR_GEN_AI_USAGE_OUTPUT_TOKENS, usage['completion_tokens']) if usage['completion_tokens'] span.set_attribute(ATTR_GEN_AI_USAGE_TOTAL_TOKENS, usage['total_tokens']) if usage['total_tokens'] end def set_error_attributes(span, result) - return if result[:error].blank? + error = result[:error] || result['error'] + return if error.blank? - span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR, result[:error].to_json) - span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR_CODE, result[:error_code]) if result[:error_code] - span.status = OpenTelemetry::Trace::Status.error("API Error: #{result[:error_code]}") + error_code = result[:error_code] || result['error_code'] + span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR, error.to_json) + span.set_attribute(ATTR_GEN_AI_RESPONSE_ERROR_CODE, error_code) if error_code + span.status = OpenTelemetry::Trace::Status.error("API Error: #{error_code}") end end diff --git a/lib/integrations/llm_instrumentation_constants.rb b/lib/integrations/llm_instrumentation_constants.rb new file mode 100644 index 000000000..6ce296ee9 --- /dev/null +++ b/lib/integrations/llm_instrumentation_constants.rb @@ -0,0 +1,31 @@ +# frozen_string_literal: true + +module Integrations::LlmInstrumentationConstants + # OpenTelemetry attribute names following GenAI semantic conventions + # https://opentelemetry.io/docs/specs/semconv/gen-ai/ + ATTR_GEN_AI_PROVIDER = 'gen_ai.provider.name' + ATTR_GEN_AI_REQUEST_MODEL = 'gen_ai.request.model' + ATTR_GEN_AI_REQUEST_TEMPERATURE = 'gen_ai.request.temperature' + ATTR_GEN_AI_PROMPT_ROLE = 'gen_ai.prompt.%d.role' + ATTR_GEN_AI_PROMPT_CONTENT = 'gen_ai.prompt.%d.content' + ATTR_GEN_AI_COMPLETION_ROLE = 'gen_ai.completion.0.role' + ATTR_GEN_AI_COMPLETION_CONTENT = 'gen_ai.completion.0.content' + ATTR_GEN_AI_USAGE_INPUT_TOKENS = 'gen_ai.usage.input_tokens' + ATTR_GEN_AI_USAGE_OUTPUT_TOKENS = 'gen_ai.usage.output_tokens' + ATTR_GEN_AI_USAGE_TOTAL_TOKENS = 'gen_ai.usage.total_tokens' + ATTR_GEN_AI_RESPONSE_ERROR = 'gen_ai.response.error' + ATTR_GEN_AI_RESPONSE_ERROR_CODE = 'gen_ai.response.error_code' + + TOOL_SPAN_NAME = 'tool.%s' + + # Langfuse-specific attributes + # https://langfuse.com/integrations/native/opentelemetry#property-mapping + ATTR_LANGFUSE_USER_ID = 'langfuse.user.id' + ATTR_LANGFUSE_SESSION_ID = 'langfuse.session.id' + ATTR_LANGFUSE_TAGS = 'langfuse.trace.tags' + ATTR_LANGFUSE_METADATA = 'langfuse.trace.metadata.%s' + ATTR_LANGFUSE_TRACE_INPUT = 'langfuse.trace.input' + ATTR_LANGFUSE_TRACE_OUTPUT = 'langfuse.trace.output' + ATTR_LANGFUSE_OBSERVATION_INPUT = 'langfuse.observation.input' + ATTR_LANGFUSE_OBSERVATION_OUTPUT = 'langfuse.observation.output' +end diff --git a/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb b/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb index 23ed2ecec..565fc26cf 100644 --- a/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb +++ b/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb @@ -28,7 +28,7 @@ RSpec.describe Captain::Conversation::ResponseBuilderJob, type: :job do end it 'uses Captain::Llm::AssistantChatService' do - expect(Captain::Llm::AssistantChatService).to receive(:new).with(assistant: assistant) + expect(Captain::Llm::AssistantChatService).to receive(:new).with(assistant: assistant, conversation_id: conversation.id) expect(Captain::Assistant::AgentRunnerService).not_to receive(:new) described_class.perform_now(conversation, assistant) diff --git a/spec/enterprise/jobs/captain/copilot/response_job_spec.rb b/spec/enterprise/jobs/captain/copilot/response_job_spec.rb index c8aacda5e..6f88a18ba 100644 --- a/spec/enterprise/jobs/captain/copilot/response_job_spec.rb +++ b/spec/enterprise/jobs/captain/copilot/response_job_spec.rb @@ -18,7 +18,9 @@ RSpec.describe Captain::Copilot::ResponseJob, type: :job do copilot_thread_id: copilot_thread.id, conversation_id: conversation_id ).and_return(chat_service) - allow(chat_service).to receive(:generate_response).with(message) + # When copilot_thread_id is present, message is already in previous_history + # so nil is passed to avoid duplicate + allow(chat_service).to receive(:generate_response).with(nil) end it 'initializes ChatService with correct parameters and calls generate_response' do @@ -28,7 +30,9 @@ RSpec.describe Captain::Copilot::ResponseJob, type: :job do copilot_thread_id: copilot_thread.id, conversation_id: conversation_id ) - expect(chat_service).to receive(:generate_response).with(message) + # Message is already persisted in copilot_thread.previous_history, + # so we pass nil to prevent duplicate user messages + expect(chat_service).to receive(:generate_response).with(nil) described_class.perform_now( assistant: assistant, conversation_id: conversation_id, diff --git a/spec/lib/integrations/llm_instrumentation_spec.rb b/spec/lib/integrations/llm_instrumentation_spec.rb index 4376acb12..f7f0c45e4 100644 --- a/spec/lib/integrations/llm_instrumentation_spec.rb +++ b/spec/lib/integrations/llm_instrumentation_spec.rb @@ -213,5 +213,95 @@ RSpec.describe Integrations::LlmInstrumentation do expect(OpenTelemetry::Trace::Status).to have_received(:error).with('API Error: rate_limit_exceeded') end end + + describe '#instrument_agent_session' do + context 'when OTEL provider is not configured' do + before { otel_config.update(value: '') } + + it 'executes the block without tracing' do + result = instance.instrument_agent_session(params) { 'my_result' } + expect(result).to eq('my_result') + end + end + + context 'when OTEL provider is configured' do + let(:mock_span) { instance_double(OpenTelemetry::Trace::Span) } + let(:mock_tracer) { instance_double(OpenTelemetry::Trace::Tracer) } + + before do + allow(mock_span).to receive(:set_attribute) + allow(instance).to receive(:tracer).and_return(mock_tracer) + allow(mock_tracer).to receive(:in_span).and_yield(mock_span) + end + + it 'executes the block and returns the result' do + result = instance.instrument_agent_session(params) { 'my_result' } + expect(result).to eq('my_result') + end + + it 'returns the block result even if instrumentation has errors' do + allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed')) + + result = instance.instrument_agent_session(params) { 'my_result' } + + expect(result).to eq('my_result') + end + + it 'sets trace input and output attributes' do + result_data = { content: 'AI response' } + instance.instrument_agent_session(params) { result_data } + + expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', params[:messages].to_json) + expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', result_data.to_json) + end + end + end + + describe '#instrument_tool_call' do + let(:tool_name) { 'search_documents' } + let(:arguments) { { query: 'test query' } } + + context 'when OTEL provider is not configured' do + before { otel_config.update(value: '') } + + it 'executes the block without tracing' do + result = instance.instrument_tool_call(tool_name, arguments) { 'tool_result' } + expect(result).to eq('tool_result') + end + end + + context 'when OTEL provider is configured' do + let(:mock_span) { instance_double(OpenTelemetry::Trace::Span) } + let(:mock_tracer) { instance_double(OpenTelemetry::Trace::Tracer) } + + before do + allow(mock_span).to receive(:set_attribute) + allow(instance).to receive(:tracer).and_return(mock_tracer) + allow(mock_tracer).to receive(:in_span).and_yield(mock_span) + end + + it 'executes the block and returns the result' do + result = instance.instrument_tool_call(tool_name, arguments) { 'tool_result' } + expect(result).to eq('tool_result') + end + + it 'propagates instrumentation errors' do + allow(mock_tracer).to receive(:in_span).and_raise(StandardError.new('Instrumentation failed')) + + expect do + instance.instrument_tool_call(tool_name, arguments) { 'tool_result' } + end.to raise_error(StandardError, 'Instrumentation failed') + end + + it 'creates a span with tool name and sets observation attributes' do + tool_result = { documents: ['doc1'] } + instance.instrument_tool_call(tool_name, arguments) { tool_result } + + expect(mock_tracer).to have_received(:in_span).with('tool.search_documents') + expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.input', arguments.to_json) + expect(mock_span).to have_received(:set_attribute).with('langfuse.observation.output', tool_result.to_json) + end + end + end end end