fix(messages): reduce audio transcription 400 retry noise (#13487)
## Summary This PR reduces duplicate failure noise for audio transcription jobs that fail with permanent HTTP 400 responses, and fixes a file-format edge case causing intermittent 400s. Sentry issue: [CHATWOOT-99E / 6660541334](https://chatwoot-p3.sentry.io/issues/6660541334/) ## Confirmed root cause For some attachments, the stored filename had no extension (example: `speech`, content type `audio/mpeg`). When the temporary transcription upload file was created without an extension, OpenAI returned: `Unrecognized file format` (HTTP 400). ## Scope of changes 1. `Messages::AudioTranscriptionJob` - Keeps `discard_on Faraday::BadRequestError` to avoid retry storms on permanent request errors. - Adds explicit Rails warning logs for discarded jobs with attachment/job/status context. 2. `Messages::AudioTranscriptionService` - Keeps guaranteed temp file cleanup via `ensure`. - Ensures temp upload files include an extension when the original filename has none, derived from blob `content_type`. - This addresses intermittent failures like extensionless `audio/mpeg` files. ## Reproduction Enable audio transcription for an account and process an audio attachment whose stored filename has no extension (for example `speech`) but valid audio content type (`audio/mpeg`). Before this fix, OpenAI transcription could return HTTP 400 `Unrecognized file format` for that attachment while similar attachments with extensions succeeded. ## Testing Ran: `bundle exec rubocop enterprise/app/jobs/messages/audio_transcription_job.rb enterprise/app/services/messages/audio_transcription_service.rb` Result: both modified files pass lint with no offenses.
This commit is contained in:
@@ -1,6 +1,15 @@
|
|||||||
class Messages::AudioTranscriptionJob < ApplicationJob
|
class Messages::AudioTranscriptionJob < ApplicationJob
|
||||||
queue_as :low
|
queue_as :low
|
||||||
|
|
||||||
|
discard_on Faraday::BadRequestError do |job, error|
|
||||||
|
log_context = {
|
||||||
|
attachment_id: job.arguments.first,
|
||||||
|
job_id: job.job_id,
|
||||||
|
status_code: error.response&.dig(:status)
|
||||||
|
}
|
||||||
|
|
||||||
|
Rails.logger.warn("Discarding audio transcription job due to bad request: #{log_context}")
|
||||||
|
end
|
||||||
retry_on ActiveStorage::FileNotFoundError, wait: 2.seconds, attempts: 3
|
retry_on ActiveStorage::FileNotFoundError, wait: 2.seconds, attempts: 3
|
||||||
|
|
||||||
def perform(attachment_id)
|
def perform(attachment_id)
|
||||||
|
|||||||
@@ -31,12 +31,20 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
|
|||||||
end
|
end
|
||||||
|
|
||||||
def fetch_audio_file
|
def fetch_audio_file
|
||||||
|
blob = attachment.file.blob
|
||||||
temp_dir = Rails.root.join('tmp/uploads/audio-transcriptions')
|
temp_dir = Rails.root.join('tmp/uploads/audio-transcriptions')
|
||||||
FileUtils.mkdir_p(temp_dir)
|
FileUtils.mkdir_p(temp_dir)
|
||||||
temp_file_path = File.join(temp_dir, "#{attachment.file.blob.key}-#{attachment.file.filename}")
|
temp_file_name = "#{blob.key}-#{blob.filename}"
|
||||||
|
|
||||||
|
if blob.filename.extension_without_delimiter.blank?
|
||||||
|
extension = extension_from_content_type(blob.content_type)
|
||||||
|
temp_file_name = "#{temp_file_name}.#{extension}" if extension.present?
|
||||||
|
end
|
||||||
|
|
||||||
|
temp_file_path = File.join(temp_dir, temp_file_name)
|
||||||
|
|
||||||
File.open(temp_file_path, 'wb') do |file|
|
File.open(temp_file_path, 'wb') do |file|
|
||||||
attachment.file.blob.open do |blob_file|
|
blob.open do |blob_file|
|
||||||
IO.copy_stream(blob_file, file)
|
IO.copy_stream(blob_file, file)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@@ -49,13 +57,12 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
|
|||||||
return transcribed_text if transcribed_text.present?
|
return transcribed_text if transcribed_text.present?
|
||||||
|
|
||||||
temp_file_path = fetch_audio_file
|
temp_file_path = fetch_audio_file
|
||||||
|
|
||||||
transcribed_text = nil
|
transcribed_text = nil
|
||||||
|
|
||||||
File.open(temp_file_path, 'rb') do |file|
|
File.open(temp_file_path, 'rb') do |file|
|
||||||
response = @client.audio.transcribe(
|
response = @client.audio.transcribe(
|
||||||
parameters: {
|
parameters: {
|
||||||
model: 'whisper-1',
|
model: WHISPER_MODEL,
|
||||||
file: file,
|
file: file,
|
||||||
temperature: 0.4
|
temperature: 0.4
|
||||||
}
|
}
|
||||||
@@ -63,10 +70,10 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
|
|||||||
transcribed_text = response['text']
|
transcribed_text = response['text']
|
||||||
end
|
end
|
||||||
|
|
||||||
FileUtils.rm_f(temp_file_path)
|
|
||||||
|
|
||||||
update_transcription(transcribed_text)
|
update_transcription(transcribed_text)
|
||||||
transcribed_text
|
transcribed_text
|
||||||
|
ensure
|
||||||
|
FileUtils.rm_f(temp_file_path) if temp_file_path.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
def instrumentation_params(file_path)
|
def instrumentation_params(file_path)
|
||||||
@@ -90,4 +97,15 @@ class Messages::AudioTranscriptionService< Llm::LegacyBaseOpenAiService
|
|||||||
|
|
||||||
message.reindex
|
message.reindex
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def extension_from_content_type(content_type)
|
||||||
|
subtype = content_type.to_s.downcase.split(';').first.to_s.split('/').last.to_s
|
||||||
|
return if subtype.blank?
|
||||||
|
|
||||||
|
{
|
||||||
|
'x-m4a' => 'm4a',
|
||||||
|
'x-wav' => 'wav',
|
||||||
|
'x-mp3' => 'mp3'
|
||||||
|
}.fetch(subtype, subtype)
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ RSpec.describe Messages::AudioTranscriptionService, type: :service do
|
|||||||
|
|
||||||
before do
|
before do
|
||||||
# Create required installation configs
|
# Create required installation configs
|
||||||
create(:installation_config, name: 'CAPTAIN_OPEN_AI_API_KEY', value: 'test-api-key')
|
InstallationConfig.find_or_create_by!(name: 'CAPTAIN_OPEN_AI_API_KEY') { |config| config.value = 'test-api-key' }
|
||||||
create(:installation_config, name: 'CAPTAIN_OPEN_AI_MODEL', value: 'gpt-4o-mini')
|
InstallationConfig.find_or_create_by!(name: 'CAPTAIN_OPEN_AI_MODEL') { |config| config.value = 'gpt-4o-mini' }
|
||||||
|
|
||||||
# Mock usage limits for transcription to be available
|
# Mock usage limits for transcription to be available
|
||||||
allow(account).to receive(:usage_limits).and_return({ captain: { responses: { current_available: 100 } } })
|
allow(account).to receive(:usage_limits).and_return({ captain: { responses: { current_available: 100 } } })
|
||||||
@@ -64,4 +64,24 @@ RSpec.describe Messages::AudioTranscriptionService, type: :service do
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
describe '#fetch_audio_file' do
|
||||||
|
let(:service) { described_class.new(attachment) }
|
||||||
|
|
||||||
|
before do
|
||||||
|
attachment.file.attach(
|
||||||
|
io: File.open(Rails.public_path.join('audio/widget/ding.mp3')),
|
||||||
|
filename: 'speech',
|
||||||
|
content_type: 'audio/mpeg'
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'adds extension from content type when filename has no extension' do
|
||||||
|
temp_file_path = service.send(:fetch_audio_file)
|
||||||
|
|
||||||
|
expect(File.extname(temp_file_path)).to eq('.mpeg')
|
||||||
|
ensure
|
||||||
|
FileUtils.rm_f(temp_file_path) if temp_file_path.present?
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user