Chore: Parse quoted text in incoming emails (#883) (#884)

* Chore: Parse quoted text in incoming emails (#883)
* Parsed the quoted text and replies in incoming emails and store them separately
* Did this parsing for plain text and html part of emails
* In the chat window, we will only show the parsed reply alone

* Conversation mailbox test fixes (#883)
This commit is contained in:
Sony Mathew
2020-05-22 18:07:06 +05:30
committed by GitHub
parent 00093fa408
commit 11b4b4ea3f
4 changed files with 59 additions and 13 deletions

View File

@@ -21,7 +21,7 @@ class ConversationMailbox < ApplicationMailbox
@message = @conversation.messages.create( @message = @conversation.messages.create(
account_id: @conversation.account_id, account_id: @conversation.account_id,
contact_id: @conversation.contact_id, contact_id: @conversation.contact_id,
content: processed_mail.content, content: processed_mail.text_content[:reply],
inbox_id: @conversation.inbox_id, inbox_id: @conversation.inbox_id,
message_type: 'incoming', message_type: 'incoming',
content_type: 'incoming_email', content_type: 'incoming_email',
@@ -71,6 +71,6 @@ class ConversationMailbox < ApplicationMailbox
end end
def decorate_mail def decorate_mail
@processed_mail = MailPresenter.new(mail) @processed_mail = MailPresenter.new(mail, @conversation.account)
end end
end end

View File

@@ -1,21 +1,32 @@
class MailPresenter < SimpleDelegator class MailPresenter < SimpleDelegator
attr_accessor :mail attr_accessor :mail
def initialize(mail) def initialize(mail, account = nil)
super(mail) super(mail)
@mail = mail @mail = mail
@account = account
end end
def subject def subject
encode_to_unicode(@mail.subject) encode_to_unicode(@mail.subject)
end end
def content def text_content
return @decoded_content if @decoded_content @decoded_text_content ||= encode_to_unicode(text_part&.body&.decoded || '')
@text_content ||= {
full: @decoded_text_content,
reply: extract_reply(@decoded_text_content)[:reply],
quoted: extract_reply(@decoded_text_content)[:quoted_text]
}
end
@decoded_content = parts.present? ? parts[0].body.decoded : decoded def html_content
@decoded_content = encode_to_unicode(@decoded_content) @decoded_html_content ||= encode_to_unicode(html_part&.body&.decoded || '')
@decoded_content @html_content ||= {
full: @decoded_html_content,
reply: extract_reply(@decoded_html_content)[:reply],
quoted: extract_reply(@decoded_html_content)[:quoted_text]
}
end end
def attachments def attachments
@@ -36,7 +47,8 @@ class MailPresenter < SimpleDelegator
def serialized_data def serialized_data
{ {
content: content, text_content: text_content,
html_content: html_content,
number_of_attachments: number_of_attachments, number_of_attachments: number_of_attachments,
subject: subject, subject: subject,
date: date, date: date,
@@ -56,4 +68,35 @@ class MailPresenter < SimpleDelegator
current_encoding = str.encoding.name current_encoding = str.encoding.name
str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?') str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?')
end end
def extract_reply(content)
@regex_arr ||= quoted_text_regexes
content_length = content.length
# calculates the matching regex closest to top of page
index = @regex_arr.inject(content_length) do |min, regex|
[(content.index(regex) || content_length), min].min
end
{
reply: content[0..(index - 1)].strip,
quoted_text: content[index..-1].strip
}
end
def quoted_text_regexes
sender_agnostic_regexes = [
Regexp.new("^.*On.*(\n)?wrote:$", Regexp::IGNORECASE),
Regexp.new("-+original\s+message-+\s*$", Regexp::IGNORECASE),
Regexp.new("from:\s*$", Regexp::IGNORECASE)
]
return sender_agnostic_regexes if @account.nil? || @account.support_email.blank?
[
Regexp.new("From:\s*" + Regexp.escape(@account.support_email), Regexp::IGNORECASE),
Regexp.new('<' + Regexp.escape(@account.support_email) + '>', Regexp::IGNORECASE),
Regexp.new(Regexp.escape(@account.support_email) + "\s+wrote:", Regexp::IGNORECASE),
Regexp.new('On(.*)' + Regexp.escape(@account.support_email) + '(.*)wrote:', Regexp::IGNORECASE)
] + sender_agnostic_regexes
end
end end

View File

@@ -8,7 +8,7 @@ RSpec.describe ConversationMailbox, type: :mailbox do
let(:reply_mail) { create_inbound_email_from_fixture('reply.eml') } let(:reply_mail) { create_inbound_email_from_fixture('reply.eml') }
let(:conversation) { create(:conversation, assignee: agent) } let(:conversation) { create(:conversation, assignee: agent) }
let(:described_subject) { described_class.receive reply_mail } let(:described_subject) { described_class.receive reply_mail }
let(:serialized_attributes) { %w[content number_of_attachments subject date to from in_reply_to cc bcc message_id] } let(:serialized_attributes) { %w[text_content html_content number_of_attachments subject date to from in_reply_to cc bcc message_id] }
before do before do
# this UUID is hardcoded in the reply.eml, that's why we are updating this # this UUID is hardcoded in the reply.eml, that's why we are updating this
@@ -19,7 +19,7 @@ RSpec.describe ConversationMailbox, type: :mailbox do
end end
it 'add the mail content as new message on the conversation' do it 'add the mail content as new message on the conversation' do
expect(conversation.messages.last.content).to eq("Let's talk about these images:\r\n\r\n") expect(conversation.messages.last.content).to eq("Let's talk about these images:")
end end
it 'add the attachments' do it 'add the attachments' do

View File

@@ -8,7 +8,7 @@ RSpec.describe MailPresenter do
it 'give utf8 encoded content' do it 'give utf8 encoded content' do
expect(decorated_mail.subject).to eq("Discussion: Let's debate these attachments") expect(decorated_mail.subject).to eq("Discussion: Let's debate these attachments")
expect(decorated_mail.content).to eq("Let's talk about these images:\r\n\r\n") expect(decorated_mail.text_content[:full]).to eq("Let's talk about these images:\r\n\r\n")
end end
it 'give decoded blob attachments' do it 'give decoded blob attachments' do
@@ -24,7 +24,10 @@ RSpec.describe MailPresenter do
it 'give the serialized data of the email to be stored in the message' do it 'give the serialized data of the email to be stored in the message' do
data = decorated_mail.serialized_data data = decorated_mail.serialized_data
expect(data.keys).to eq([:content, :number_of_attachments, :subject, :date, :to, :from, :in_reply_to, :cc, :bcc, :message_id]) expect(data.keys).to eq([
:text_content, :html_content, :number_of_attachments, :subject, :date, :to,
:from, :in_reply_to, :cc, :bcc, :message_id
])
expect(data[:subject]).to eq(decorated_mail.subject) expect(data[:subject]).to eq(decorated_mail.subject)
expect(data[:date].to_s).to eq('2020-04-20T04:20:20-04:00') expect(data[:date].to_s).to eq('2020-04-20T04:20:20-04:00')
expect(data[:message_id]).to eq(mail.message_id) expect(data[:message_id]).to eq(mail.message_id)