Files
leadchat/enterprise/app/services/captain/llm/pdf_processing_service.rb
Sojan Jose cc86b8c7f1 fix: stream attachment handling in workers (#12870)
We’ve been watching Sidekiq workers climb from ~600 MB at boot to
1.4–1.5 GB after an hour whenever attachment-heavy jobs run. This PR is
an experiment to curb that growth by streaming attachments instead of
loading the whole blob into Ruby: reply-mailer inline attachments,
Telegram uploads, and audio transcriptions now read/write in chunks. If
this keeps RSS stable in production we’ll keep it; otherwise we’ll roll
it back and keep digging
2025-12-05 13:02:53 -08:00

45 lines
954 B
Ruby

class Captain::Llm::PdfProcessingService < Llm::LegacyBaseOpenAiService
def initialize(document)
super()
@document = document
end
def process
return if document.openai_file_id.present?
file_id = upload_pdf_to_openai
raise CustomExceptions::PdfUploadError, I18n.t('captain.documents.pdf_upload_failed') if file_id.blank?
document.store_openai_file_id(file_id)
end
private
attr_reader :document
def upload_pdf_to_openai
with_tempfile do |temp_file|
response = @client.files.upload(
parameters: {
file: temp_file,
purpose: 'assistants'
}
)
response['id']
end
end
def with_tempfile
Tempfile.create(['pdf_upload', '.pdf'], binmode: true) do |temp_file|
document.pdf_file.blob.open do |blob_file|
IO.copy_stream(blob_file, temp_file)
end
temp_file.flush
temp_file.rewind
yield temp_file
end
end
end