chore: Improve email parsing using email trimmer gem (#3611)

Email parsing using email_trimmer gem

Fixes: #3539 , #2954, #3572
This commit is contained in:
Tejaswini Chile
2021-12-22 18:16:40 +05:30
committed by GitHub
parent 009abc1948
commit 44486fc8e1
9 changed files with 1232 additions and 32 deletions

View File

@@ -8,30 +8,48 @@ class MailPresenter < SimpleDelegator
end
def subject
encode_to_unicode(@mail.subject || '')
encode_to_unicode(@mail.subject)
end
def text_content
@decoded_text_content ||= encode_to_unicode(text_part&.decoded || decoded_message || '')
@decoded_text_content = select_body || ''
encoding = @decoded_text_content.encoding
body = EmailReplyTrimmer.trim(@decoded_text_content)
return {} if @decoded_text_content.blank?
@text_content ||= {
full: @decoded_text_content,
reply: extract_reply(@decoded_text_content)[:reply],
quoted: extract_reply(@decoded_text_content)[:quoted_text]
full: select_body,
reply: @decoded_text_content,
quoted: body.force_encoding(encoding).encode('UTF-8')
}
end
def select_body
message = mail.text_part || mail.html_part || mail
decoded = encode_to_unicode(message.decoded)
# Certain trigger phrases that means we didn't parse correctly
return '' if %r{(Content-Type: multipart/alternative|text/plain)}.match?(decoded)
if (mail.content_type || '').include? 'text/html'
::HtmlParser.parse_reply(decoded)
else
decoded
end
end
def html_content
@decoded_html_content ||= encode_to_unicode(html_part&.decoded)
@decoded_html_content = select_body || ''
return {} if @decoded_html_content.blank?
body = EmailReplyTrimmer.trim(@decoded_html_content)
@html_content ||= {
full: @decoded_html_content,
reply: extract_reply(@decoded_html_content)[:reply],
quoted: extract_reply(@decoded_html_content)[:quoted_text]
full: select_body,
reply: @decoded_html_content,
quoted: body
}
end
@@ -47,14 +65,6 @@ class MailPresenter < SimpleDelegator
end
end
def decoded_message
if mail.multipart?
return mail.text_part ? mail.text_part.decoded : nil
end
mail.decoded
end
def number_of_attachments
mail.attachments.count
end
@@ -114,21 +124,8 @@ class MailPresenter < SimpleDelegator
return str if current_encoding == 'UTF-8'
str.encode(current_encoding, 'UTF-8', invalid: :replace, undef: :replace, replace: '?')
end
def extract_reply(content)
@regex_arr ||= quoted_text_regexes
content_length = content.length
# calculates the matching regex closest to top of page
index = @regex_arr.inject(content_length) do |min, regex|
[(content.index(regex) || content_length), min].min
end
{
reply: content[0..(index - 1)].strip,
quoted_text: content[index..].strip
}
rescue StandardError
''
end
def quoted_text_regexes