# Pull Request Template ## Description Playground now uses v2. It was only wired to use v1. Traces get `source: playground` on langfuse when playground has been used. ## Type of change - [x] New feature (non-breaking change which adds functionality) ## How Has This Been Tested? Please describe the tests that you ran to verify your changes. Provide instructions so we can reproduce. Please also list any relevant details for your test configuration. locally and specs <img width="1806" height="1276" alt="image" src="https://github.com/user-attachments/assets/41ef4eb3-52b1-4b8e-9a4f-e8510c90cb39" /> ## Checklist: - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my code - [x] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] New and existing unit tests pass locally with my changes - [x] Any dependent changes have been merged and published in downstream modules
136 lines
4.5 KiB
Ruby
136 lines
4.5 KiB
Ruby
module Captain::ChatHelper
|
|
include Integrations::LlmInstrumentation
|
|
include Captain::ChatResponseHelper
|
|
include Captain::ChatGenerationRecorder
|
|
|
|
def request_chat_completion
|
|
log_chat_completion_request
|
|
chat = build_chat
|
|
|
|
add_messages_to_chat(chat)
|
|
with_agent_session do
|
|
last_content = conversation_messages.last[:content]
|
|
text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(last_content)
|
|
|
|
response = attachments.any? ? chat.ask(text, with: attachments) : chat.ask(text)
|
|
build_response(response)
|
|
end
|
|
rescue StandardError => e
|
|
Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error in chat completion: #{e}"
|
|
raise e
|
|
end
|
|
|
|
private
|
|
|
|
def build_chat
|
|
llm_chat = chat(model: @model, temperature: temperature)
|
|
llm_chat = llm_chat.with_params(response_format: { type: 'json_object' })
|
|
|
|
llm_chat = setup_tools(llm_chat)
|
|
llm_chat = setup_system_instructions(llm_chat)
|
|
setup_event_handlers(llm_chat)
|
|
end
|
|
|
|
def setup_tools(llm_chat)
|
|
@tools&.each do |tool|
|
|
llm_chat = llm_chat.with_tool(tool)
|
|
end
|
|
llm_chat
|
|
end
|
|
|
|
def setup_system_instructions(chat)
|
|
system_messages = @messages.select { |m| m[:role] == 'system' || m[:role] == :system }
|
|
combined_instructions = system_messages.pluck(:content).join("\n\n")
|
|
chat.with_instructions(combined_instructions)
|
|
end
|
|
|
|
def setup_event_handlers(chat)
|
|
# NOTE: We only use on_end_message to record the generation with token counts.
|
|
# RubyLLM callbacks fire after chunks arrive, not around the API call, so
|
|
# span timing won't reflect actual API latency. But Langfuse calculates costs
|
|
# from model + token counts, so this is sufficient for cost tracking.
|
|
chat.on_end_message { |message| record_llm_generation(chat, message) }
|
|
chat.on_tool_call { |tool_call| handle_tool_call(tool_call) }
|
|
chat.on_tool_result { |result| handle_tool_result(result) }
|
|
chat
|
|
end
|
|
|
|
def handle_tool_call(tool_call)
|
|
persist_thinking_message(tool_call)
|
|
start_tool_span(tool_call)
|
|
(@pending_tool_calls ||= []).push(tool_call)
|
|
end
|
|
|
|
def handle_tool_result(result)
|
|
end_tool_span(result)
|
|
persist_tool_completion
|
|
end
|
|
|
|
def add_messages_to_chat(chat)
|
|
conversation_messages[0...-1].each do |msg|
|
|
text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(msg[:content])
|
|
content = attachments.any? ? RubyLLM::Content.new(text, attachments) : text
|
|
chat.add_message(role: msg[:role].to_sym, content: content)
|
|
end
|
|
end
|
|
|
|
def instrumentation_params(chat = nil)
|
|
{
|
|
span_name: "llm.captain.#{feature_name}",
|
|
account_id: resolved_account_id,
|
|
conversation_id: @conversation_id,
|
|
feature_name: feature_name,
|
|
model: @model,
|
|
messages: chat ? chat.messages.map { |m| { role: m.role.to_s, content: m.content.to_s } } : @messages,
|
|
temperature: temperature,
|
|
metadata: {
|
|
assistant_id: @assistant&.id,
|
|
channel_type: resolved_channel_type,
|
|
source: @source
|
|
}.compact
|
|
}
|
|
end
|
|
|
|
def conversation_messages
|
|
@messages.reject { |m| m[:role] == 'system' || m[:role] == :system }
|
|
end
|
|
|
|
def temperature
|
|
@assistant&.config&.[]('temperature').to_f || 1
|
|
end
|
|
|
|
def resolved_account_id
|
|
@account&.id || @assistant&.account_id
|
|
end
|
|
|
|
def resolved_channel_type
|
|
Conversation.find_by(account_id: resolved_account_id, display_id: @conversation_id)&.inbox&.channel_type if @conversation_id
|
|
end
|
|
|
|
# Ensures all LLM calls and tool executions within an agentic loop
|
|
# are grouped under a single trace/session in Langfuse.
|
|
#
|
|
# Without this guard, each recursive call to request_chat_completion
|
|
# (triggered by tool calls) would create a separate trace instead of
|
|
# nesting within the existing session span.
|
|
def with_agent_session(&)
|
|
already_active = @agent_session_active
|
|
return yield if already_active
|
|
|
|
@agent_session_active = true
|
|
instrument_agent_session(instrumentation_params, &)
|
|
ensure
|
|
@agent_session_active = false unless already_active
|
|
end
|
|
|
|
# Must be implemented by including class to identify the feature for instrumentation.
|
|
# Used for Langfuse tagging and span naming.
|
|
def feature_name
|
|
raise NotImplementedError, "#{self.class.name} must implement #feature_name"
|
|
end
|
|
|
|
def log_chat_completion_request
|
|
Rails.logger.info("#{self.class.name} Assistant: #{@assistant.id}, requesting completion for #{@messages} with #{@tools&.length || 0} tools")
|
|
end
|
|
end
|