Chore: Parse quoted text in incoming emails (#883) (#884)

* Chore: Parse quoted text in incoming emails (#883)
* Parsed the quoted text and replies in incoming emails and store them separately
* Did this parsing for plain text and html part of emails
* In the chat window, we will only show the parsed reply alone

* Conversation mailbox test fixes (#883)
This commit is contained in:
Sony Mathew
2020-05-22 18:07:06 +05:30
committed by GitHub
parent 00093fa408
commit 11b4b4ea3f
4 changed files with 59 additions and 13 deletions

View File

@@ -21,7 +21,7 @@ class ConversationMailbox < ApplicationMailbox
@message = @conversation.messages.create(
account_id: @conversation.account_id,
contact_id: @conversation.contact_id,
content: processed_mail.content,
content: processed_mail.text_content[:reply],
inbox_id: @conversation.inbox_id,
message_type: 'incoming',
content_type: 'incoming_email',
@@ -71,6 +71,6 @@ class ConversationMailbox < ApplicationMailbox
end
def decorate_mail
@processed_mail = MailPresenter.new(mail)
@processed_mail = MailPresenter.new(mail, @conversation.account)
end
end

View File

@@ -1,21 +1,32 @@
class MailPresenter < SimpleDelegator
attr_accessor :mail
def initialize(mail)
def initialize(mail, account = nil)
super(mail)
@mail = mail
@account = account
end
def subject
encode_to_unicode(@mail.subject)
end
def content
return @decoded_content if @decoded_content
def text_content
@decoded_text_content ||= encode_to_unicode(text_part&.body&.decoded || '')
@text_content ||= {
full: @decoded_text_content,
reply: extract_reply(@decoded_text_content)[:reply],
quoted: extract_reply(@decoded_text_content)[:quoted_text]
}
end
@decoded_content = parts.present? ? parts[0].body.decoded : decoded
@decoded_content = encode_to_unicode(@decoded_content)
@decoded_content
def html_content
@decoded_html_content ||= encode_to_unicode(html_part&.body&.decoded || '')
@html_content ||= {
full: @decoded_html_content,
reply: extract_reply(@decoded_html_content)[:reply],
quoted: extract_reply(@decoded_html_content)[:quoted_text]
}
end
def attachments
@@ -36,7 +47,8 @@ class MailPresenter < SimpleDelegator
def serialized_data
{
content: content,
text_content: text_content,
html_content: html_content,
number_of_attachments: number_of_attachments,
subject: subject,
date: date,
@@ -56,4 +68,35 @@ class MailPresenter < SimpleDelegator
current_encoding = str.encoding.name
str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?')
end
def extract_reply(content)
@regex_arr ||= quoted_text_regexes
content_length = content.length
# calculates the matching regex closest to top of page
index = @regex_arr.inject(content_length) do |min, regex|
[(content.index(regex) || content_length), min].min
end
{
reply: content[0..(index - 1)].strip,
quoted_text: content[index..-1].strip
}
end
def quoted_text_regexes
sender_agnostic_regexes = [
Regexp.new("^.*On.*(\n)?wrote:$", Regexp::IGNORECASE),
Regexp.new("-+original\s+message-+\s*$", Regexp::IGNORECASE),
Regexp.new("from:\s*$", Regexp::IGNORECASE)
]
return sender_agnostic_regexes if @account.nil? || @account.support_email.blank?
[
Regexp.new("From:\s*" + Regexp.escape(@account.support_email), Regexp::IGNORECASE),
Regexp.new('<' + Regexp.escape(@account.support_email) + '>', Regexp::IGNORECASE),
Regexp.new(Regexp.escape(@account.support_email) + "\s+wrote:", Regexp::IGNORECASE),
Regexp.new('On(.*)' + Regexp.escape(@account.support_email) + '(.*)wrote:', Regexp::IGNORECASE)
] + sender_agnostic_regexes
end
end

View File

@@ -8,7 +8,7 @@ RSpec.describe ConversationMailbox, type: :mailbox do
let(:reply_mail) { create_inbound_email_from_fixture('reply.eml') }
let(:conversation) { create(:conversation, assignee: agent) }
let(:described_subject) { described_class.receive reply_mail }
let(:serialized_attributes) { %w[content number_of_attachments subject date to from in_reply_to cc bcc message_id] }
let(:serialized_attributes) { %w[text_content html_content number_of_attachments subject date to from in_reply_to cc bcc message_id] }
before do
# this UUID is hardcoded in the reply.eml, that's why we are updating this
@@ -19,7 +19,7 @@ RSpec.describe ConversationMailbox, type: :mailbox do
end
it 'add the mail content as new message on the conversation' do
expect(conversation.messages.last.content).to eq("Let's talk about these images:\r\n\r\n")
expect(conversation.messages.last.content).to eq("Let's talk about these images:")
end
it 'add the attachments' do

View File

@@ -8,7 +8,7 @@ RSpec.describe MailPresenter do
it 'give utf8 encoded content' do
expect(decorated_mail.subject).to eq("Discussion: Let's debate these attachments")
expect(decorated_mail.content).to eq("Let's talk about these images:\r\n\r\n")
expect(decorated_mail.text_content[:full]).to eq("Let's talk about these images:\r\n\r\n")
end
it 'give decoded blob attachments' do
@@ -24,7 +24,10 @@ RSpec.describe MailPresenter do
it 'give the serialized data of the email to be stored in the message' do
data = decorated_mail.serialized_data
expect(data.keys).to eq([:content, :number_of_attachments, :subject, :date, :to, :from, :in_reply_to, :cc, :bcc, :message_id])
expect(data.keys).to eq([
:text_content, :html_content, :number_of_attachments, :subject, :date, :to,
:from, :in_reply_to, :cc, :bcc, :message_id
])
expect(data[:subject]).to eq(decorated_mail.subject)
expect(data[:date].to_s).to eq('2020-04-20T04:20:20-04:00')
expect(data[:message_id]).to eq(mail.message_id)