feat: Add support for image files in Captain (#11730)
# Pull Request Template ## Linear links: - https://linear.app/chatwoot/issue/CW-4479/if-image-is-sent-by-the-customer-send-it-to-openai ## Description This pull request adds “Captain image support” to Chatwoot. It introduces multimodal message handling so that when a customer sends an image, Captain can forward the file to OpenAI’s vision endpoint, generate a caption/analysis ## Type of change Please delete options that are not relevant. - [x] New feature (non-breaking change which adds functionality) ## How Has This Been Tested? <img width="891" alt="image" src="https://github.com/user-attachments/assets/c7cc98ed-cc44-4865-a53a-83d129e2fe2c" /> ## Checklist: - [ ] My code follows the style guidelines of this project - [ ] I have performed a self-review of my code - [ ] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [ ] My changes generate no new warnings - [ ] I have added tests that prove my fix is effective or that my feature works - [ ] New and existing unit tests pass locally with my changes - [ ] Any dependent changes have been merged and published in downstream modules --------- Co-authored-by: Pranav <pranav@chatwoot.com>
This commit is contained in:
committed by
GitHub
parent
257cd07ee6
commit
811eb66615
@@ -25,8 +25,8 @@ class Api::V1::Accounts::Captain::AssistantsController < Api::V1::Accounts::Base
|
||||
|
||||
def playground
|
||||
response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response(
|
||||
params[:message_content],
|
||||
message_history
|
||||
additional_message: params[:message_content],
|
||||
message_history: message_history
|
||||
)
|
||||
|
||||
render json: response
|
||||
|
||||
@@ -26,8 +26,7 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob
|
||||
|
||||
def generate_and_process_response
|
||||
@response = Captain::Llm::AssistantChatService.new(assistant: @assistant).generate_response(
|
||||
@conversation.messages.incoming.last.content,
|
||||
collect_previous_messages
|
||||
message_history: collect_previous_messages
|
||||
)
|
||||
|
||||
return process_action('handoff') if handoff_requested?
|
||||
@@ -43,33 +42,11 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob
|
||||
.where(message_type: [:incoming, :outgoing])
|
||||
.where(private: false)
|
||||
.map do |message|
|
||||
{
|
||||
content: message_content(message),
|
||||
role: determine_role(message)
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
def message_content(message)
|
||||
return message.content if message.content.present?
|
||||
return 'User has shared a message without content' unless message.attachments.any?
|
||||
|
||||
audio_transcriptions = extract_audio_transcriptions(message.attachments)
|
||||
return audio_transcriptions if audio_transcriptions.present?
|
||||
|
||||
'User has shared an attachment'
|
||||
end
|
||||
|
||||
def extract_audio_transcriptions(attachments)
|
||||
audio_attachments = attachments.where(file_type: :audio)
|
||||
return '' if audio_attachments.blank?
|
||||
|
||||
transcriptions = ''
|
||||
audio_attachments.each do |attachment|
|
||||
result = Messages::AudioTranscriptionService.new(attachment).perform
|
||||
transcriptions += result[:transcriptions] if result[:success]
|
||||
{
|
||||
content: prepare_multimodal_message_content(message),
|
||||
role: determine_role(message)
|
||||
}
|
||||
end
|
||||
transcriptions
|
||||
end
|
||||
|
||||
def determine_role(message)
|
||||
@@ -78,6 +55,10 @@ class Captain::Conversation::ResponseBuilderJob < ApplicationJob
|
||||
message.message_type == 'incoming' ? 'user' : 'system'
|
||||
end
|
||||
|
||||
def prepare_multimodal_message_content(message)
|
||||
Captain::OpenAiMessageBuilderService.new(message: message).generate_content
|
||||
end
|
||||
|
||||
def handoff_requested?
|
||||
@response['response'] == 'conversation_handoff'
|
||||
end
|
||||
|
||||
@@ -12,9 +12,16 @@ class Captain::Llm::AssistantChatService < Llm::BaseOpenAiService
|
||||
register_tools
|
||||
end
|
||||
|
||||
def generate_response(input, previous_messages = [], role = 'user')
|
||||
@messages += previous_messages
|
||||
@messages << { role: role, content: input } if input.present?
|
||||
# additional_message: A single message (String) from the user that should be appended to the chat.
|
||||
# It can be an empty String or nil when you only want to supply historical messages.
|
||||
# message_history: An Array of already formatted messages that provide the previous context.
|
||||
# role: The role for the additional_message (defaults to `user`).
|
||||
#
|
||||
# NOTE: Parameters are provided as keyword arguments to improve clarity and avoid relying on
|
||||
# positional ordering.
|
||||
def generate_response(additional_message: nil, message_history: [], role: 'user')
|
||||
@messages += message_history
|
||||
@messages << { role: role, content: additional_message } if additional_message.present?
|
||||
request_chat_completion
|
||||
end
|
||||
|
||||
|
||||
@@ -0,0 +1,59 @@
|
||||
class Captain::OpenAiMessageBuilderService
|
||||
pattr_initialize [:message!]
|
||||
|
||||
def generate_content
|
||||
parts = []
|
||||
parts << text_part(@message.content) if @message.content.present?
|
||||
parts.concat(attachment_parts(@message.attachments)) if @message.attachments.any?
|
||||
|
||||
return 'Message without content' if parts.blank?
|
||||
return parts.first[:text] if parts.one? && parts.first[:type] == 'text'
|
||||
|
||||
parts
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def text_part(text)
|
||||
{ type: 'text', text: text }
|
||||
end
|
||||
|
||||
def image_part(image_url)
|
||||
{ type: 'image_url', image_url: { url: image_url } }
|
||||
end
|
||||
|
||||
def attachment_parts(attachments)
|
||||
image_attachments = attachments.where(file_type: :image)
|
||||
image_content = image_parts(image_attachments)
|
||||
|
||||
transcription = extract_audio_transcriptions(attachments)
|
||||
transcription_part = text_part(transcription) if transcription.present?
|
||||
|
||||
attachment_part = text_part('User has shared an attachment') if attachments.where.not(file_type: %i[image audio]).exists?
|
||||
|
||||
[image_content, transcription_part, attachment_part].flatten.compact
|
||||
end
|
||||
|
||||
def image_parts(image_attachments)
|
||||
image_attachments.each_with_object([]) do |attachment, parts|
|
||||
url = get_attachment_url(attachment)
|
||||
parts << image_part(url) if url.present?
|
||||
end
|
||||
end
|
||||
|
||||
def get_attachment_url(attachment)
|
||||
return attachment.external_url if attachment.external_url.present?
|
||||
|
||||
attachment.file.attached? ? attachment.file_url : nil
|
||||
end
|
||||
|
||||
def extract_audio_transcriptions(attachments)
|
||||
audio_attachments = attachments.where(file_type: :audio)
|
||||
return '' if audio_attachments.blank?
|
||||
|
||||
audio_attachments.map do |attachment|
|
||||
result = Messages::AudioTranscriptionService.new(attachment).perform
|
||||
result[:success] ? result[:transcriptions] : ''
|
||||
end.join
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user