Files
leadchat/enterprise/app/helpers/captain/chat_helper.rb
Aakash Bakhle 77493c5d0f fix: captain assistant image comprehension (#13390)
# Pull Request Template

## Description

Fixes # (issue)

When we migrated to RubyLLM, images weren't being sent properly in
RubyLLM format to the model, so it did not understand images.

## Type of change

Please delete options that are not relevant.

- [x] Bug fix (non-breaking change which fixes an issue)

## How Has This Been Tested?

Please describe the tests that you ran to verify your changes. Provide
instructions so we can reproduce. Please also list any relevant details
for your test configuration.

specs + local testing

Current behaviour on staging:
<img width="772" height="1012" alt="image"
src="https://github.com/user-attachments/assets/7b7d360f-dea4-48af-b20b-ee4c98a38a85"
/>

local testing with fix:
<img width="792" height="1216" alt="image"
src="https://github.com/user-attachments/assets/5ef82452-015e-4bda-a68f-884d00acb014"
/>


## Checklist:

- [x] My code follows the style guidelines of this project
- [x] I have performed a self-review of my code
- [x] I have commented on my code, particularly in hard-to-understand
areas
- [ ] I have made corresponding changes to the documentation
- [x] My changes generate no new warnings
- [x] I have added tests that prove my fix is effective or that my
feature works
- [x] New and existing unit tests pass locally with my changes
- [x] Any dependent changes have been merged and published in downstream
modules

---------

Co-authored-by: Sojan Jose <sojan@pepalo.com>
2026-01-28 16:07:13 -08:00

137 lines
4.3 KiB
Ruby

module Captain::ChatHelper
include Integrations::LlmInstrumentation
include Captain::ChatResponseHelper
include Captain::ChatGenerationRecorder
def request_chat_completion
log_chat_completion_request
chat = build_chat
add_messages_to_chat(chat)
with_agent_session do
last_content = conversation_messages.last[:content]
text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(last_content)
response = attachments.any? ? chat.ask(text, with: attachments) : chat.ask(text)
build_response(response)
end
rescue StandardError => e
Rails.logger.error "#{self.class.name} Assistant: #{@assistant.id}, Error in chat completion: #{e}"
raise e
end
private
def build_chat
llm_chat = chat(model: @model, temperature: temperature)
llm_chat = llm_chat.with_params(response_format: { type: 'json_object' })
llm_chat = setup_tools(llm_chat)
llm_chat = setup_system_instructions(llm_chat)
setup_event_handlers(llm_chat)
end
def setup_tools(llm_chat)
@tools&.each do |tool|
llm_chat = llm_chat.with_tool(tool)
end
llm_chat
end
def setup_system_instructions(chat)
system_messages = @messages.select { |m| m[:role] == 'system' || m[:role] == :system }
combined_instructions = system_messages.pluck(:content).join("\n\n")
chat.with_instructions(combined_instructions)
end
def setup_event_handlers(chat)
# NOTE: We only use on_end_message to record the generation with token counts.
# RubyLLM callbacks fire after chunks arrive, not around the API call, so
# span timing won't reflect actual API latency. But Langfuse calculates costs
# from model + token counts, so this is sufficient for cost tracking.
chat.on_end_message { |message| record_llm_generation(chat, message) }
chat.on_tool_call { |tool_call| handle_tool_call(tool_call) }
chat.on_tool_result { |result| handle_tool_result(result) }
chat
end
def handle_tool_call(tool_call)
persist_thinking_message(tool_call)
start_tool_span(tool_call)
@pending_tool_calls ||= []
@pending_tool_calls.push(tool_call)
end
def handle_tool_result(result)
end_tool_span(result)
persist_tool_completion
end
def add_messages_to_chat(chat)
conversation_messages[0...-1].each do |msg|
text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(msg[:content])
content = attachments.any? ? RubyLLM::Content.new(text, attachments) : text
chat.add_message(role: msg[:role].to_sym, content: content)
end
end
def instrumentation_params(chat = nil)
{
span_name: "llm.captain.#{feature_name}",
account_id: resolved_account_id,
conversation_id: @conversation_id,
feature_name: feature_name,
model: @model,
messages: chat ? chat.messages.map { |m| { role: m.role.to_s, content: m.content.to_s } } : @messages,
temperature: temperature,
metadata: {
assistant_id: @assistant&.id
}
}
end
def conversation_messages
@messages.reject { |m| m[:role] == 'system' || m[:role] == :system }
end
def temperature
@assistant&.config&.[]('temperature').to_f || 1
end
def resolved_account_id
@account&.id || @assistant&.account_id
end
# Ensures all LLM calls and tool executions within an agentic loop
# are grouped under a single trace/session in Langfuse.
#
# Without this guard, each recursive call to request_chat_completion
# (triggered by tool calls) would create a separate trace instead of
# nesting within the existing session span.
def with_agent_session(&)
already_active = @agent_session_active
return yield if already_active
@agent_session_active = true
instrument_agent_session(instrumentation_params, &)
ensure
@agent_session_active = false unless already_active
end
# Must be implemented by including class to identify the feature for instrumentation.
# Used for Langfuse tagging and span naming.
def feature_name
raise NotImplementedError, "#{self.class.name} must implement #feature_name"
end
def log_chat_completion_request
Rails.logger.info(
"#{self.class.name} Assistant: #{@assistant.id}, Requesting chat completion
for messages #{@messages} with #{@tools&.length || 0} tools
"
)
end
end