Chore: Parse quoted text in incoming emails (#883) (#884)

* Chore: Parse quoted text in incoming emails (#883)
* Parsed the quoted text and replies in incoming emails and store them separately
* Did this parsing for plain text and html part of emails
* In the chat window, we will only show the parsed reply alone

* Conversation mailbox test fixes (#883)
This commit is contained in:
Sony Mathew
2020-05-22 18:07:06 +05:30
committed by GitHub
parent 00093fa408
commit 11b4b4ea3f
4 changed files with 59 additions and 13 deletions

View File

@@ -21,7 +21,7 @@ class ConversationMailbox < ApplicationMailbox
@message = @conversation.messages.create(
account_id: @conversation.account_id,
contact_id: @conversation.contact_id,
content: processed_mail.content,
content: processed_mail.text_content[:reply],
inbox_id: @conversation.inbox_id,
message_type: 'incoming',
content_type: 'incoming_email',
@@ -71,6 +71,6 @@ class ConversationMailbox < ApplicationMailbox
end
def decorate_mail
@processed_mail = MailPresenter.new(mail)
@processed_mail = MailPresenter.new(mail, @conversation.account)
end
end

View File

@@ -1,21 +1,32 @@
class MailPresenter < SimpleDelegator
attr_accessor :mail
def initialize(mail)
def initialize(mail, account = nil)
super(mail)
@mail = mail
@account = account
end
def subject
encode_to_unicode(@mail.subject)
end
def content
return @decoded_content if @decoded_content
def text_content
@decoded_text_content ||= encode_to_unicode(text_part&.body&.decoded || '')
@text_content ||= {
full: @decoded_text_content,
reply: extract_reply(@decoded_text_content)[:reply],
quoted: extract_reply(@decoded_text_content)[:quoted_text]
}
end
@decoded_content = parts.present? ? parts[0].body.decoded : decoded
@decoded_content = encode_to_unicode(@decoded_content)
@decoded_content
def html_content
@decoded_html_content ||= encode_to_unicode(html_part&.body&.decoded || '')
@html_content ||= {
full: @decoded_html_content,
reply: extract_reply(@decoded_html_content)[:reply],
quoted: extract_reply(@decoded_html_content)[:quoted_text]
}
end
def attachments
@@ -36,7 +47,8 @@ class MailPresenter < SimpleDelegator
def serialized_data
{
content: content,
text_content: text_content,
html_content: html_content,
number_of_attachments: number_of_attachments,
subject: subject,
date: date,
@@ -56,4 +68,35 @@ class MailPresenter < SimpleDelegator
current_encoding = str.encoding.name
str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?')
end
def extract_reply(content)
@regex_arr ||= quoted_text_regexes
content_length = content.length
# calculates the matching regex closest to top of page
index = @regex_arr.inject(content_length) do |min, regex|
[(content.index(regex) || content_length), min].min
end
{
reply: content[0..(index - 1)].strip,
quoted_text: content[index..-1].strip
}
end
def quoted_text_regexes
sender_agnostic_regexes = [
Regexp.new("^.*On.*(\n)?wrote:$", Regexp::IGNORECASE),
Regexp.new("-+original\s+message-+\s*$", Regexp::IGNORECASE),
Regexp.new("from:\s*$", Regexp::IGNORECASE)
]
return sender_agnostic_regexes if @account.nil? || @account.support_email.blank?
[
Regexp.new("From:\s*" + Regexp.escape(@account.support_email), Regexp::IGNORECASE),
Regexp.new('<' + Regexp.escape(@account.support_email) + '>', Regexp::IGNORECASE),
Regexp.new(Regexp.escape(@account.support_email) + "\s+wrote:", Regexp::IGNORECASE),
Regexp.new('On(.*)' + Regexp.escape(@account.support_email) + '(.*)wrote:', Regexp::IGNORECASE)
] + sender_agnostic_regexes
end
end