From 811eb6661546da8ddcb4253faad25ef00344ce75 Mon Sep 17 00:00:00 2001 From: Tanmay Deep Sharma <32020192+tds-1@users.noreply.github.com> Date: Thu, 26 Jun 2025 07:46:09 +0530 Subject: [PATCH] feat: Add support for image files in Captain (#11730) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Pull Request Template ## Linear links: - https://linear.app/chatwoot/issue/CW-4479/if-image-is-sent-by-the-customer-send-it-to-openai ## Description This pull request adds “Captain image support” to Chatwoot. It introduces multimodal message handling so that when a customer sends an image, Captain can forward the file to OpenAI’s vision endpoint, generate a caption/analysis ## Type of change Please delete options that are not relevant. - [x] New feature (non-breaking change which adds functionality) ## How Has This Been Tested? image ## Checklist: - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules --------- Co-authored-by: Pranav --- .../accounts/captain/assistants_controller.rb | 4 +- .../conversation/response_builder_job.rb | 37 +-- .../captain/llm/assistant_chat_service.rb | 13 +- .../open_ai_message_builder_service.rb | 59 ++++ .../captain/assistants_controller_spec.rb | 8 +- .../conversation/response_builder_job_spec.rb | 25 ++ .../open_ai_message_builder_service_spec.rb | 309 ++++++++++++++++++ 7 files changed, 418 insertions(+), 37 deletions(-) create mode 100644 enterprise/app/services/captain/open_ai_message_builder_service.rb create mode 100644 spec/enterprise/services/captain/open_ai_message_builder_service_spec.rb diff --git a/enterprise/app/controllers/api/v1/accounts/captain/assistants_controller.rb b/enterprise/app/controllers/api/v1/accounts/captain/assistants_controller.rb index e5a055836..ec8e8e653 100644 --- a/enterprise/app/controllers/api/v1/accounts/captain/assistants_controller.rb +++ b/enterprise/app/controllers/api/v1/accounts/captain/assistants_controller.rb @@ -25,8 +25,8 @@ class Api::V1::Accounts::Captain::AssistantsController < Api::V1::Accounts::Base def playground response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response( - params[:message_content], - message_history + additional_message: params[:message_content], + message_history: message_history ) render json: response diff --git a/enterprise/app/jobs/captain/conversation/response_builder_job.rb b/enterprise/app/jobs/captain/conversation/response_builder_job.rb index f341a6e98..431945896 100644 --- a/enterprise/app/jobs/captain/conversation/response_builder_job.rb +++ b/enterprise/app/jobs/captain/conversation/response_builder_job.rb @@ -26,8 +26,7 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob def generate_and_process_response @response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response( - @conversation.messages.incoming.last.content, - collect_previous_messages + message_history: collect_previous_messages ) return process_action('handoff') if handoff_requested? @@ -43,33 +42,11 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob .where(message_type: [:incoming, :outgoing]) .where(private: false) .map do |message| - { - content: message_content(message), - role: determine_role(message) - } - end - end - - def message_content(message) - return message.content if message.content.present? - return 'User has shared a message without content' unless message.attachments.any? - - audio_transcriptions = extract_audio_transcriptions(message.attachments) - return audio_transcriptions if audio_transcriptions.present? - - 'User has shared an attachment' - end - - def extract_audio_transcriptions(attachments) - audio_attachments = attachments.where(file_type: :audio) - return '' if audio_attachments.blank? - - transcriptions = '' - audio_attachments.each do |attachment| - result = Messages::AudioTranscriptionService.new(attachment).perform - transcriptions += result[:transcriptions] if result[:success] + { + content: prepare_multimodal_message_content(message), + role: determine_role(message) + } end - transcriptions end def determine_role(message) @@ -78,6 +55,10 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob message.message_type == 'incoming' ? 'user' : 'system' end + def prepare_multimodal_message_content(message) + Captain::OpenAiMessageBuilderService.new(message: message).generate_content + end + def handoff_requested? @response['response'] == 'conversation_handoff' end diff --git a/enterprise/app/services/captain/llm/assistant_chat_service.rb b/enterprise/app/services/captain/llm/assistant_chat_service.rb index 569931d44..ca8fafaa0 100644 --- a/enterprise/app/services/captain/llm/assistant_chat_service.rb +++ b/enterprise/app/services/captain/llm/assistant_chat_service.rb @@ -12,9 +12,16 @@ class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService register_tools end - def generate_response(input, previous_messages = [], role = 'user') - @messages += previous_messages - @messages << { role: role, content: input } if input.present? + # additional_message: A single message (String) from the user that should be appended to the chat. + # It can be an empty String or nil when you only want to supply historical messages. + # message_history: An Array of already formatted messages that provide the previous context. + # role: The role for the additional_message (defaults to `user`). + # + # NOTE: Parameters are provided as keyword arguments to improve clarity and avoid relying on + # positional ordering. + def generate_response(additional_message: nil, message_history: [], role: 'user') + @messages += message_history + @messages << { role: role, content: additional_message } if additional_message.present? request_chat_completion end diff --git a/enterprise/app/services/captain/open_ai_message_builder_service.rb b/enterprise/app/services/captain/open_ai_message_builder_service.rb new file mode 100644 index 000000000..3320ad537 --- /dev/null +++ b/enterprise/app/services/captain/open_ai_message_builder_service.rb @@ -0,0 +1,59 @@ +class Captain::OpenAiMessageBuilderService + pattr_initialize [:message!] + + def generate_content + parts = [] + parts << text_part(@message.content) if @message.content.present? + parts.concat(attachment_parts(@message.attachments)) if @message.attachments.any? + + return 'Message without content' if parts.blank? + return parts.first[:text] if parts.one? && parts.first[:type] == 'text' + + parts + end + + private + + def text_part(text) + { type: 'text', text: text } + end + + def image_part(image_url) + { type: 'image_url', image_url: { url: image_url } } + end + + def attachment_parts(attachments) + image_attachments = attachments.where(file_type: :image) + image_content = image_parts(image_attachments) + + transcription = extract_audio_transcriptions(attachments) + transcription_part = text_part(transcription) if transcription.present? + + attachment_part = text_part('User has shared an attachment') if attachments.where.not(file_type: %i[image audio]).exists? + + [image_content, transcription_part, attachment_part].flatten.compact + end + + def image_parts(image_attachments) + image_attachments.each_with_object([]) do |attachment, parts| + url = get_attachment_url(attachment) + parts << image_part(url) if url.present? + end + end + + def get_attachment_url(attachment) + return attachment.external_url if attachment.external_url.present? + + attachment.file.attached? ? attachment.file_url : nil + end + + def extract_audio_transcriptions(attachments) + audio_attachments = attachments.where(file_type: :audio) + return '' if audio_attachments.blank? + + audio_attachments.map do |attachment| + result = Messages::AudioTranscriptionService.new(attachment).perform + result[:success] ? result[:transcriptions] : '' + end.join + end +end \ No newline at end of file diff --git a/spec/enterprise/controllers/api/v1/accounts/captain/assistants_controller_spec.rb b/spec/enterprise/controllers/api/v1/accounts/captain/assistants_controller_spec.rb index 1f6d83d80..80be6f30f 100644 --- a/spec/enterprise/controllers/api/v1/accounts/captain/assistants_controller_spec.rb +++ b/spec/enterprise/controllers/api/v1/accounts/captain/assistants_controller_spec.rb @@ -211,8 +211,8 @@ RSpec.describe 'Api::V1::Accounts::Captain::Assistants', type: :request do expect(response).to have_http_status(:success) expect(chat_service).to have_received(:generate_response).with( - valid_params[:message_content], - valid_params[:message_history] + additional_message: valid_params[:message_content], + message_history: valid_params[:message_history] ) expect(json_response[:content]).to eq('Assistant response') end @@ -232,8 +232,8 @@ RSpec.describe 'Api::V1::Accounts::Captain::Assistants', type: :request do expect(response).to have_http_status(:success) expect(chat_service).to have_received(:generate_response).with( - params_without_history[:message_content], - [] + additional_message: params_without_history[:message_content], + message_history: [] ) end end diff --git a/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb b/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb index 1e4a6e824..ca8d4a6c0 100644 --- a/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb +++ b/spec/enterprise/jobs/captain/conversation/response_builder_job_spec.rb @@ -30,5 +30,30 @@ RSpec.describe Captain::Conversation::ResponseBuilderJob, type: :job do account.reload expect(account.usage_limits[:captain][:responses][:consumed]).to eq(1) end + + context 'when message contains an image' do + let(:message_with_image) { create(:message, conversation: conversation, message_type: :incoming, content: 'Can you help with this error?') } + let(:image_attachment) { message_with_image.attachments.create!(account: account, file_type: :image, external_url: 'https://example.com/error.jpg') } + + before do + image_attachment + end + + it 'includes image URL directly in the message content for OpenAI vision analysis' do + # Expect the generate_response to receive multimodal content with image URL + expect(mock_llm_chat_service).to receive(:generate_response) do |**kwargs| + history = kwargs[:message_history] + last_entry = history.last + expect(last_entry[:content]).to be_an(Array) + expect(last_entry[:content].any? { |part| part[:type] == 'text' && part[:text] == 'Can you help with this error?' }).to be true + expect(last_entry[:content].any? do |part| + part[:type] == 'image_url' && part[:image_url][:url] == 'https://example.com/error.jpg' + end).to be true + { 'response' => 'I can see the error in your image. It appears to be a database connection issue.' } + end + + described_class.perform_now(conversation, assistant) + end + end end end diff --git a/spec/enterprise/services/captain/open_ai_message_builder_service_spec.rb b/spec/enterprise/services/captain/open_ai_message_builder_service_spec.rb new file mode 100644 index 000000000..13c29f756 --- /dev/null +++ b/spec/enterprise/services/captain/open_ai_message_builder_service_spec.rb @@ -0,0 +1,309 @@ +require 'rails_helper' + +RSpec.describe Captain::OpenAiMessageBuilderService do + subject(:service) { described_class.new(message: message) } + + let(:message) { create(:message, content: 'Hello world') } + + describe '#generate_content' do + context 'when message has only text content' do + it 'returns the text content directly' do + expect(service.generate_content).to eq('Hello world') + end + end + + context 'when message has no content and no attachments' do + let(:message) { create(:message, content: nil) } + + it 'returns default message' do + expect(service.generate_content).to eq('Message without content') + end + end + + context 'when message has text content and attachments' do + before do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image.jpg') + attachment.save! + end + + it 'returns an array of content parts' do + result = service.generate_content + expect(result).to be_an(Array) + expect(result).to include({ type: 'text', text: 'Hello world' }) + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }) + end + end + + context 'when message has only non-text attachments' do + let(:message) { create(:message, content: nil) } + + before do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image.jpg') + attachment.save! + end + + it 'returns an array of content parts without text' do + result = service.generate_content + expect(result).to be_an(Array) + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }) + expect(result).not_to include(hash_including(type: 'text', text: 'Hello world')) + end + end + end + + describe '#attachment_parts' do + let(:message) { create(:message, content: nil) } + let(:attachments) { message.attachments } + + context 'with image attachments' do + before do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image.jpg') + attachment.save! + end + + it 'includes image parts' do + result = service.send(:attachment_parts, attachments) + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }) + end + end + + context 'with audio attachments' do + let(:audio_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :audio) + attachment.save! + attachment + end + + before do + allow(Messages::AudioTranscriptionService).to receive(:new).with(audio_attachment).and_return( + instance_double(Messages::AudioTranscriptionService, perform: { success: true, transcriptions: 'Audio transcription text' }) + ) + end + + it 'includes transcription text part' do + audio_attachment # trigger creation + result = service.send(:attachment_parts, attachments) + expect(result).to include({ type: 'text', text: 'Audio transcription text' }) + end + end + + context 'with other file types' do + before do + attachment = message.attachments.build(account_id: message.account_id, file_type: :file) + attachment.save! + end + + it 'includes generic attachment message' do + result = service.send(:attachment_parts, attachments) + expect(result).to include({ type: 'text', text: 'User has shared an attachment' }) + end + end + + context 'with mixed attachment types' do + let(:image_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image.jpg') + attachment.save! + attachment + end + + let(:audio_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :audio) + attachment.save! + attachment + end + + let(:document_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :file) + attachment.save! + attachment + end + + before do + allow(Messages::AudioTranscriptionService).to receive(:new).with(audio_attachment).and_return( + instance_double(Messages::AudioTranscriptionService, perform: { success: true, transcriptions: 'Audio text' }) + ) + end + + it 'includes all relevant parts' do + image_attachment # trigger creation + audio_attachment # trigger creation + document_attachment # trigger creation + + result = service.send(:attachment_parts, attachments) + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }) + expect(result).to include({ type: 'text', text: 'Audio text' }) + expect(result).to include({ type: 'text', text: 'User has shared an attachment' }) + end + end + end + + describe '#image_parts' do + let(:message) { create(:message, content: nil) } + + context 'with valid image attachments' do + let(:image1) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image1.jpg') + attachment.save! + attachment + end + + let(:image2) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: 'https://example.com/image2.jpg') + attachment.save! + attachment + end + + it 'returns image parts for all valid images' do + image1 # trigger creation + image2 # trigger creation + + image_attachments = message.attachments.where(file_type: :image) + result = service.send(:image_parts, image_attachments) + + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image1.jpg' } }) + expect(result).to include({ type: 'image_url', image_url: { url: 'https://example.com/image2.jpg' } }) + end + end + + context 'with image attachments without URLs' do + let(:image_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image, external_url: nil) + attachment.save! + attachment + end + + before do + allow(image_attachment).to receive(:file).and_return(instance_double(ActiveStorage::Attached::One, attached?: false)) + end + + it 'skips images without valid URLs' do + image_attachment # trigger creation + + image_attachments = message.attachments.where(file_type: :image) + result = service.send(:image_parts, image_attachments) + + expect(result).to be_empty + end + end + end + + describe '#get_attachment_url' do + let(:attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :image) + attachment.save! + attachment + end + + context 'when attachment has external_url' do + before { attachment.update(external_url: 'https://example.com/image.jpg') } + + it 'returns external_url' do + expect(service.send(:get_attachment_url, attachment)).to eq('https://example.com/image.jpg') + end + end + + context 'when attachment has attached file' do + before do + attachment.update(external_url: nil) + allow(attachment).to receive(:file).and_return(instance_double(ActiveStorage::Attached::One, attached?: true)) + allow(attachment).to receive(:file_url).and_return('https://local.com/file.jpg') + end + + it 'returns file_url' do + expect(service.send(:get_attachment_url, attachment)).to eq('https://local.com/file.jpg') + end + end + + context 'when attachment has no URL or file' do + before do + attachment.update(external_url: nil) + allow(attachment).to receive(:file).and_return(instance_double(ActiveStorage::Attached::One, attached?: false)) + end + + it 'returns nil' do + expect(service.send(:get_attachment_url, attachment)).to be_nil + end + end + end + + describe '#extract_audio_transcriptions' do + let(:message) { create(:message, content: nil) } + + context 'with no audio attachments' do + it 'returns empty string' do + result = service.send(:extract_audio_transcriptions, message.attachments) + expect(result).to eq('') + end + end + + context 'with successful audio transcriptions' do + let(:audio1) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :audio) + attachment.save! + attachment + end + + let(:audio2) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :audio) + attachment.save! + attachment + end + + before do + allow(Messages::AudioTranscriptionService).to receive(:new).with(audio1).and_return( + instance_double(Messages::AudioTranscriptionService, perform: { success: true, transcriptions: 'First audio text. ' }) + ) + allow(Messages::AudioTranscriptionService).to receive(:new).with(audio2).and_return( + instance_double(Messages::AudioTranscriptionService, perform: { success: true, transcriptions: 'Second audio text.' }) + ) + end + + it 'concatenates all successful transcriptions' do + audio1 # trigger creation + audio2 # trigger creation + + attachments = message.attachments + result = service.send(:extract_audio_transcriptions, attachments) + expect(result).to eq('First audio text. Second audio text.') + end + end + + context 'with failed audio transcriptions' do + let(:audio_attachment) do + attachment = message.attachments.build(account_id: message.account_id, file_type: :audio) + attachment.save! + attachment + end + + before do + allow(Messages::AudioTranscriptionService).to receive(:new).with(audio_attachment).and_return( + instance_double(Messages::AudioTranscriptionService, perform: { success: false, transcriptions: nil }) + ) + end + + it 'returns empty string for failed transcriptions' do + audio_attachment # trigger creation + + attachments = message.attachments + result = service.send(:extract_audio_transcriptions, attachments) + expect(result).to eq('') + end + end + end + + describe 'private helper methods' do + describe '#text_part' do + it 'returns correct text part format' do + result = service.send(:text_part, 'Hello world') + expect(result).to eq({ type: 'text', text: 'Hello world' }) + end + end + + describe '#image_part' do + it 'returns correct image part format' do + result = service.send(:image_part, 'https://example.com/image.jpg') + expect(result).to eq({ type: 'image_url', image_url: { url: 'https://example.com/image.jpg' } }) + end + end + end +end