diff --git a/enterprise/app/jobs/messages/audio_transcription_job.rb b/enterprise/app/jobs/messages/audio_transcription_job.rb index ce35405c8..5daf1e160 100644 --- a/enterprise/app/jobs/messages/audio_transcription_job.rb +++ b/enterprise/app/jobs/messages/audio_transcription_job.rb @@ -1,6 +1,15 @@ class Messages::AudioTranscriptionJob < ApplicationJob queue_as :low + discard_on Faraday::BadRequestError do |job, error| + log_context = { + attachment_id: job.arguments.first, + job_id: job.job_id, + status_code: error.response&.dig(:status) + } + + Rails.logger.warn("Discarding audio transcription job due to bad request: #{log_context}") + end retry_on ActiveStorage::FileNotFoundError, wait: 2.seconds, attempts: 3 def perform(attachment_id) diff --git a/enterprise/app/services/messages/audio_transcription_service.rb b/enterprise/app/services/messages/audio_transcription_service.rb index 1676dd862..4aa156f47 100644 --- a/enterprise/app/services/messages/audio_transcription_service.rb +++ b/enterprise/app/services/messages/audio_transcription_service.rb @@ -31,12 +31,20 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService end def fetch_audio_file + blob = attachment.file.blob temp_dir = Rails.root.join('tmp/uploads/audio-transcriptions') FileUtils.mkdir_p(temp_dir) - temp_file_path = File.join(temp_dir, "#{attachment.file.blob.key}-#{attachment.file.filename}") + temp_file_name = "#{blob.key}-#{blob.filename}" + + if blob.filename.extension_without_delimiter.blank? + extension = extension_from_content_type(blob.content_type) + temp_file_name = "#{temp_file_name}.#{extension}" if extension.present? + end + + temp_file_path = File.join(temp_dir, temp_file_name) File.open(temp_file_path, 'wb') do |file| - attachment.file.blob.open do |blob_file| + blob.open do |blob_file| IO.copy_stream(blob_file, file) end end @@ -49,13 +57,12 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService return transcribed_text if transcribed_text.present? temp_file_path = fetch_audio_file - transcribed_text = nil File.open(temp_file_path, 'rb') do |file| response = @client.audio.transcribe( parameters: { - model: 'whisper-1', + model: WHISPER_MODEL, file: file, temperature: 0.4 } @@ -63,10 +70,10 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService transcribed_text = response['text'] end - FileUtils.rm_f(temp_file_path) - update_transcription(transcribed_text) transcribed_text + ensure + FileUtils.rm_f(temp_file_path) if temp_file_path.present? end def instrumentation_params(file_path) @@ -90,4 +97,15 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService message.reindex end + + def extension_from_content_type(content_type) + subtype = content_type.to_s.downcase.split(';').first.to_s.split('/').last.to_s + return if subtype.blank? + + { + 'x-m4a' => 'm4a', + 'x-wav' => 'wav', + 'x-mp3' => 'mp3' + }.fetch(subtype, subtype) + end end diff --git a/spec/enterprise/services/messages/audio_transcription_service_spec.rb b/spec/enterprise/services/messages/audio_transcription_service_spec.rb index 41a4cae83..7ece2540a 100644 --- a/spec/enterprise/services/messages/audio_transcription_service_spec.rb +++ b/spec/enterprise/services/messages/audio_transcription_service_spec.rb @@ -8,8 +8,8 @@ RSpec.describe Messages::AudioTranscriptionService, type: :service do before do # Create required installation configs - create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-api-key') - create(:installation_config, name: 'CAPTAIN_OPEN_AI_MODEL', value: 'gpt-4o-mini') + InstallationConfig.find_or_create_by!(name: 'CAPTAIN_OPEN_AI_API_KEY') { |config| config.value = 'test-api-key' } + InstallationConfig.find_or_create_by!(name: 'CAPTAIN_OPEN_AI_MODEL') { |config| config.value = 'gpt-4o-mini' } # Mock usage limits for transcription to be available allow(account).to receive(:usage_limits).and_return({ captain: { responses: { current_available: 100 } } }) @@ -64,4 +64,24 @@ RSpec.describe Messages::AudioTranscriptionService, type: :service do end end end + + describe '#fetch_audio_file' do + let(:service) { described_class.new(attachment) } + + before do + attachment.file.attach( + io: File.open(Rails.public_path.join('audio/widget/ding.mp3')), + filename: 'speech', + content_type: 'audio/mpeg' + ) + end + + it 'adds extension from content type when filename has no extension' do + temp_file_path = service.send(:fetch_audio_file) + + expect(File.extname(temp_file_path)).to eq('.mpeg') + ensure + FileUtils.rm_f(temp_file_path) if temp_file_path.present? + end + end end