fix: stream attachment handling in workers (#12870)
We’ve been watching Sidekiq workers climb from ~600 MB at boot to 1.4–1.5 GB after an hour whenever attachment-heavy jobs run. This PR is an experiment to curb that growth by streaming attachments instead of loading the whole blob into Ruby: reply-mailer inline attachments, Telegram uploads, and audio transcriptions now read/write in chunks. If this keeps RSS stable in production we’ll keep it; otherwise we’ll roll it back and keep digging
This commit is contained in:
@@ -30,21 +30,15 @@ class DataImportJob < ApplicationJob
|
||||
def parse_csv_and_build_contacts
|
||||
contacts = []
|
||||
rejected_contacts = []
|
||||
# Ensuring that importing non utf-8 characters will not throw error
|
||||
data = @data_import.import_file.download
|
||||
utf8_data = data.force_encoding('UTF-8')
|
||||
|
||||
# Ensure that the data is valid UTF-8, preserving valid characters
|
||||
clean_data = utf8_data.valid_encoding? ? utf8_data : utf8_data.encode('UTF-16le', invalid: :replace, replace: '').encode('UTF-8')
|
||||
|
||||
csv = CSV.parse(clean_data, headers: true)
|
||||
|
||||
csv.each do |row|
|
||||
current_contact = @contact_manager.build_contact(row.to_h.with_indifferent_access)
|
||||
if current_contact.valid?
|
||||
contacts << current_contact
|
||||
else
|
||||
append_rejected_contact(row, current_contact, rejected_contacts)
|
||||
with_import_file do |file|
|
||||
csv_reader(file).each do |row|
|
||||
current_contact = @contact_manager.build_contact(row.to_h.with_indifferent_access)
|
||||
if current_contact.valid?
|
||||
contacts << current_contact
|
||||
else
|
||||
append_rejected_contact(row, current_contact, rejected_contacts)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -75,7 +69,7 @@ class DataImportJob < ApplicationJob
|
||||
end
|
||||
|
||||
def generate_csv_data(rejected_contacts)
|
||||
headers = CSV.parse(@data_import.import_file.download, headers: true).headers
|
||||
headers = csv_headers
|
||||
headers << 'errors'
|
||||
return if rejected_contacts.blank?
|
||||
|
||||
@@ -99,4 +93,31 @@ class DataImportJob < ApplicationJob
|
||||
def send_import_failed_notification_to_admin
|
||||
AdministratorNotifications::AccountNotificationMailer.with(account: @data_import.account).contact_import_failed.deliver_later
|
||||
end
|
||||
|
||||
def csv_headers
|
||||
header_row = nil
|
||||
with_import_file do |file|
|
||||
header_row = csv_reader(file).first
|
||||
end
|
||||
header_row&.headers || []
|
||||
end
|
||||
|
||||
def csv_reader(file)
|
||||
file.rewind
|
||||
raw_data = file.read
|
||||
utf8_data = raw_data.force_encoding('UTF-8')
|
||||
clean_data = utf8_data.valid_encoding? ? utf8_data : utf8_data.encode('UTF-16le', invalid: :replace, replace: '').encode('UTF-8')
|
||||
|
||||
CSV.new(StringIO.new(clean_data), headers: true)
|
||||
end
|
||||
|
||||
def with_import_file
|
||||
temp_dir = Rails.root.join('tmp/imports')
|
||||
FileUtils.mkdir_p(temp_dir)
|
||||
|
||||
@data_import.import_file.open(tmpdir: temp_dir) do |file|
|
||||
file.binmode
|
||||
yield file
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
51
app/mailers/conversation_reply_mailer_attachment_helper.rb
Normal file
51
app/mailers/conversation_reply_mailer_attachment_helper.rb
Normal file
@@ -0,0 +1,51 @@
|
||||
# Handles attachment processing for ConversationReplyMailer flows.
|
||||
module ConversationReplyMailerAttachmentHelper
|
||||
private
|
||||
|
||||
def process_attachments_as_files_for_email_reply
|
||||
# Attachment processing for direct email replies (when replying to a single message)
|
||||
#
|
||||
# How attachments are handled:
|
||||
# 1. Total file size (<20MB): Added directly to the email as proper attachments
|
||||
# 2. Total file size (>20MB): Added to @large_attachments to be displayed as links in the email
|
||||
|
||||
@options[:attachments] = []
|
||||
@large_attachments = []
|
||||
current_total_size = 0
|
||||
|
||||
@message.attachments.each do |attachment|
|
||||
current_total_size = handle_attachment_inline(current_total_size, attachment)
|
||||
end
|
||||
end
|
||||
|
||||
def read_blob_content(blob)
|
||||
buffer = +''
|
||||
blob.open do |file|
|
||||
while (chunk = file.read(64.kilobytes))
|
||||
buffer << chunk
|
||||
end
|
||||
end
|
||||
buffer
|
||||
end
|
||||
|
||||
def handle_attachment_inline(current_total_size, attachment)
|
||||
blob = attachment.file.blob
|
||||
return current_total_size if blob.blank?
|
||||
|
||||
file_size = blob.byte_size
|
||||
attachment_name = attachment.file.filename.to_s
|
||||
|
||||
if current_total_size + file_size <= 20.megabytes
|
||||
content = read_blob_content(blob)
|
||||
mail.attachments[attachment_name] = {
|
||||
mime_type: attachment.file.content_type || 'application/octet-stream',
|
||||
content: content
|
||||
}
|
||||
@options[:attachments] << { name: attachment_name }
|
||||
current_total_size + file_size
|
||||
else
|
||||
@large_attachments << attachment
|
||||
current_total_size
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,4 +1,6 @@
|
||||
module ConversationReplyMailerHelper
|
||||
include ConversationReplyMailerAttachmentHelper
|
||||
|
||||
def prepare_mail(cc_bcc_enabled)
|
||||
@options = {
|
||||
to: to_emails,
|
||||
@@ -27,34 +29,6 @@ module ConversationReplyMailerHelper
|
||||
mail(@options)
|
||||
end
|
||||
|
||||
def process_attachments_as_files_for_email_reply
|
||||
# Attachment processing for direct email replies (when replying to a single message)
|
||||
#
|
||||
# How attachments are handled:
|
||||
# 1. Total file size (<20MB): Added directly to the email as proper attachments
|
||||
# 2. Total file size (>20MB): Added to @large_attachments to be displayed as links in the email
|
||||
|
||||
@options[:attachments] = []
|
||||
@large_attachments = []
|
||||
current_total_size = 0
|
||||
|
||||
@message.attachments.each do |attachment|
|
||||
raw_data = attachment.file.download
|
||||
attachment_name = attachment.file.filename.to_s
|
||||
file_size = raw_data.bytesize
|
||||
|
||||
# Attach files directly until we hit 20MB total
|
||||
# After reaching 20MB, send remaining files as links
|
||||
if current_total_size + file_size <= 20.megabytes
|
||||
mail.attachments[attachment_name] = raw_data
|
||||
@options[:attachments] << { name: attachment_name }
|
||||
current_total_size += file_size
|
||||
else
|
||||
@large_attachments << attachment
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def oauth_smtp_settings
|
||||
|
||||
@@ -96,11 +96,16 @@ class Telegram::SendAttachmentsService
|
||||
# Telegram picks up the file name from original field name, so we need to save the file with the original name.
|
||||
# Hence not using Tempfile here.
|
||||
def save_attachment_to_tempfile(attachment)
|
||||
raw_data = attachment.file.download
|
||||
temp_dir = Rails.root.join('tmp/uploads')
|
||||
temp_dir = Rails.root.join('tmp/uploads', "telegram-#{attachment.message_id}")
|
||||
FileUtils.mkdir_p(temp_dir)
|
||||
temp_file_path = File.join(temp_dir, attachment.file.filename.to_s)
|
||||
File.write(temp_file_path, raw_data, mode: 'wb')
|
||||
|
||||
File.open(temp_file_path, 'wb') do |file|
|
||||
attachment.file.blob.open do |blob_file|
|
||||
IO.copy_stream(blob_file, file)
|
||||
end
|
||||
end
|
||||
|
||||
temp_file_path
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user