diff --git a/enterprise/app/helpers/captain/chat_helper.rb b/enterprise/app/helpers/captain/chat_helper.rb index 9bdba5553..2734e510e 100644 --- a/enterprise/app/helpers/captain/chat_helper.rb +++ b/enterprise/app/helpers/captain/chat_helper.rb @@ -10,7 +10,10 @@ module Captain::ChatHelper add_messages_to_chat(chat) with_agent_session do - response = chat.ask(conversation_messages.last[:content]) + last_content = conversation_messages.last[:content] + text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(last_content) + + response = attachments.any? ? chat.ask(text, with: attachments) : chat.ask(text) build_response(response) end rescue StandardError => e @@ -68,7 +71,9 @@ module Captain::ChatHelper def add_messages_to_chat(chat) conversation_messages[0...-1].each do |msg| - chat.add_message(role: msg[:role].to_sym, content: msg[:content]) + text, attachments = Captain::OpenAiMessageBuilderService.extract_text_and_attachments(msg[:content]) + content = attachments.any? ? RubyLLM::Content.new(text, attachments) : text + chat.add_message(role: msg[:role].to_sym, content: content) end end diff --git a/enterprise/app/services/captain/open_ai_message_builder_service.rb b/enterprise/app/services/captain/open_ai_message_builder_service.rb index 4aaa64e0a..54c959761 100644 --- a/enterprise/app/services/captain/open_ai_message_builder_service.rb +++ b/enterprise/app/services/captain/open_ai_message_builder_service.rb @@ -1,6 +1,15 @@ class Captain::OpenAiMessageBuilderService pattr_initialize [:message!] + # Extracts text and image URLs from multimodal content array (reverse of generate_content) + def self.extract_text_and_attachments(content) + return [content, []] unless content.is_a?(Array) + + text_parts = content.select { |part| part[:type] == 'text' }.pluck(:text) + image_urls = content.select { |part| part[:type] == 'image_url' }.filter_map { |part| part.dig(:image_url, :url) } + [text_parts.join(' ').presence, image_urls] + end + def generate_content parts = [] parts << text_part(@message.content) if @message.content.present? diff --git a/spec/enterprise/services/captain/llm/assistant_chat_service_spec.rb b/spec/enterprise/services/captain/llm/assistant_chat_service_spec.rb new file mode 100644 index 000000000..bc4ff0b63 --- /dev/null +++ b/spec/enterprise/services/captain/llm/assistant_chat_service_spec.rb @@ -0,0 +1,134 @@ +require 'rails_helper' + +RSpec.describe Captain::Llm::AssistantChatService do + let(:account) { create(:account) } + let(:assistant) { create(:captain_assistant, account: account) } + let(:conversation) { create(:conversation, account: account) } + + let(:mock_chat) { instance_double(RubyLLM::Chat) } + let(:mock_response) do + instance_double( + RubyLLM::Message, + content: '{"response": "I can see the image shows a pricing table", "reasoning": "Analyzed the image"}' + ) + end + + before do + create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-key') + + allow(RubyLLM).to receive(:chat).and_return(mock_chat) + allow(mock_chat).to receive(:with_temperature).and_return(mock_chat) + allow(mock_chat).to receive(:with_params).and_return(mock_chat) + allow(mock_chat).to receive(:with_tool).and_return(mock_chat) + allow(mock_chat).to receive(:with_instructions).and_return(mock_chat) + allow(mock_chat).to receive(:add_message).and_return(mock_chat) + allow(mock_chat).to receive(:on_end_message).and_return(mock_chat) + allow(mock_chat).to receive(:on_tool_call).and_return(mock_chat) + allow(mock_chat).to receive(:on_tool_result).and_return(mock_chat) + allow(mock_chat).to receive(:messages).and_return([]) + end + + describe 'image analysis' do + context 'when user sends a message with an image attachment' do + let(:message_history) do + [ + { + role: 'user', + content: [ + { type: 'text', text: 'What do you see in this image?' }, + { type: 'image_url', image_url: { url: 'https://example.com/screenshot.png' } } + ] + } + ] + end + + it 'sends the image to the LLM for analysis' do + expect(mock_chat).to receive(:ask).with( + 'What do you see in this image?', + with: ['https://example.com/screenshot.png'] + ).and_return(mock_response) + + service = described_class.new(assistant: assistant, conversation_id: conversation.display_id) + service.generate_response(message_history: message_history) + end + end + + context 'when user sends only an image without text' do + let(:message_history) do + [ + { + role: 'user', + content: [ + { type: 'image_url', image_url: { url: 'https://example.com/photo.jpg' } } + ] + } + ] + end + + it 'sends the image to the LLM with nil text' do + expect(mock_chat).to receive(:ask).with( + nil, + with: ['https://example.com/photo.jpg'] + ).and_return(mock_response) + + service = described_class.new(assistant: assistant, conversation_id: conversation.display_id) + service.generate_response(message_history: message_history) + end + end + + context 'when user sends a plain text message' do + let(:message_history) do + [ + { role: 'user', content: 'Hello, how can you help me?' } + ] + end + + it 'sends the text without attachments' do + expect(mock_chat).to receive(:ask).with('Hello, how can you help me?').and_return(mock_response) + + service = described_class.new(assistant: assistant, conversation_id: conversation.display_id) + service.generate_response(message_history: message_history) + end + end + end + + describe 'conversation history with images' do + context 'when previous messages contain images' do + let(:message_history) do + [ + { + role: 'user', + content: [ + { type: 'text', text: 'Here is my error screenshot' }, + { type: 'image_url', image_url: { url: 'https://example.com/error.png' } } + ] + }, + { role: 'assistant', content: 'I see the error. Try restarting.' }, + { role: 'user', content: 'It still does not work' } + ] + end + + it 'includes images from conversation history in context' do + # First historical message should include the image via RubyLLM::Content + expect(mock_chat).to receive(:add_message) do |args| + expect(args[:role]).to eq(:user) + expect(args[:content]).to be_a(RubyLLM::Content) + expect(args[:content].text).to eq('Here is my error screenshot') + expect(args[:content].attachments.first.source.to_s).to eq('https://example.com/error.png') + end.ordered + + # Second historical message is plain text + expect(mock_chat).to receive(:add_message).with( + role: :assistant, + content: 'I see the error. Try restarting.' + ).ordered + + # Current message asked via chat.ask + expect(mock_chat).to receive(:ask).with('It still does not work').and_return(mock_response) + + service = described_class.new(assistant: assistant, conversation_id: conversation.display_id) + service.generate_response(message_history: message_history) + end + end + end +end