From 5f6e17f3071bb1238d5ac04f8bf7d62fd256a23c Mon Sep 17 00:00:00 2001 From: Sojan Jose Date: Thu, 18 Jan 2024 15:36:36 +0400 Subject: [PATCH] feat: Use Telegram HTML Parsemode (#8731) - this ensures that the markdown formatted messages from the Chatwoot dashboard will render consistently in telegram UI for the supported types like bold, italics, links etc --- app/models/channel/telegram.rb | 28 +++++++----- spec/models/channel/telegram_spec.rb | 64 +++++++++++++++++++++++++--- 2 files changed, 77 insertions(+), 15 deletions(-) diff --git a/app/models/channel/telegram.rb b/app/models/channel/telegram.rb index 365ca59c8..f6b97f2d2 100644 --- a/app/models/channel/telegram.rb +++ b/app/models/channel/telegram.rb @@ -149,24 +149,32 @@ class Channel::Telegram < ApplicationRecord }) end - def convert_markdown_to_telegram(text) - ## supported characters : https://core.telegram.org/bots/api#markdown-style - ## To implement MarkdownV2, we will need to do a lot of escaping + def convert_markdown_to_telegram_html(text) + # ref: https://core.telegram.org/bots/api#html-style - # Convert bold - double asterisks to single asterisk in Telegram - # Chatwoot uses double asterisks for bold, while telegram used single asterisk - text.gsub!(/\*\*(.*?)\*\*/, '*\1*') - text + # escape html tags in text. We are subbing \n to
since commonmark will strip exta '\n' + text = CGI.escapeHTML(text.gsub("\n", '
')) + + # convert markdown to html + html = CommonMarker.render_html(text).strip + + # remove all html tags except b, strong, i, em, u, ins, s, strike, del, a, code, pre, blockquote + stripped_html = Rails::HTML5::SafeListSanitizer.new.sanitize(html, tags: %w[b strong i em u ins s strike del a code pre blockquote], + attributes: %w[href]) + + # converted escaped br tags to \n + stripped_html.gsub('<br>', "\n") end def message_request(chat_id, text, reply_markup = nil, reply_to_message_id = nil) - text_to_md = convert_markdown_to_telegram(text) + text_payload = convert_markdown_to_telegram_html(text) + HTTParty.post("#{telegram_api_url}/sendMessage", body: { chat_id: chat_id, - text: text_to_md, + text: text_payload, reply_markup: reply_markup, - parse_mode: 'Markdown', + parse_mode: 'HTML', reply_to_message_id: reply_to_message_id }) end diff --git a/spec/models/channel/telegram_spec.rb b/spec/models/channel/telegram_spec.rb index 248e7b43e..2cc927977 100644 --- a/spec/models/channel/telegram_spec.rb +++ b/spec/models/channel/telegram_spec.rb @@ -3,6 +3,58 @@ require 'rails_helper' RSpec.describe Channel::Telegram do let(:telegram_channel) { create(:channel_telegram) } + describe '#convert_markdown_to_telegram_html' do + subject { telegram_channel.send(:convert_markdown_to_telegram_html, text) } + + context 'when text contains multiple newline characters' do + let(:text) { "Line one\nLine two\n\nLine four" } + + it 'preserves multiple newline characters' do + expect(subject).to eq("Line one\nLine two\n\nLine four") + end + end + + context 'when text contains broken markdown' do + let(:text) { 'This is a **broken markdown with HTML tags.' } + + it 'does not break and properly converts to Telegram HTML format and escapes html tags' do + expect(subject).to eq('This is a **broken markdown with <b>HTML</b> tags.') + end + end + + context 'when text contains markdown and HTML elements' do + let(:text) { "Hello *world*! This is bold and this is italic.\nThis is a new line." } + + it 'converts markdown to Telegram HTML format and escapes other html' do + expect(subject).to eq("Hello world! This is <b>bold</b> and this is <i>italic</i>.\nThis is a new line.") + end + end + + context 'when text contains unsupported HTML tags' do + let(:text) { 'This is a test with unsupported tags.' } + + it 'removes unsupported HTML tags' do + expect(subject).to eq('This is a <span>test</span> with unsupported tags.') + end + end + + context 'when text contains special characters' do + let(:text) { 'Special characters: & < >' } + + it 'escapes special characters' do + expect(subject).to eq('Special characters: & < >') + end + end + + context 'when text contains markdown links' do + let(:text) { 'Check this [link](http://example.com) out!' } + + it 'converts markdown links to Telegram HTML format' do + expect(subject).to eq('Check this link out!') + end + end + end + context 'when a valid message and empty attachments' do it 'send message' do message = create(:message, message_type: :outgoing, content: 'test', @@ -10,7 +62,7 @@ RSpec.describe Channel::Telegram do stub_request(:post, "https://api.telegram.org/bot#{telegram_channel.bot_token}/sendMessage") .with( - body: 'chat_id=123&text=test&reply_markup=&parse_mode=Markdown&reply_to_message_id=' + body: 'chat_id=123&text=test&reply_markup=&parse_mode=HTML&reply_to_message_id=' ) .to_return( status: 200, @@ -21,13 +73,15 @@ RSpec.describe Channel::Telegram do expect(telegram_channel.send_message_on_telegram(message)).to eq('telegram_123') end - it 'send message with markdown converted to telegram markdown' do + it 'send message with markdown converted to telegram HTML' do message = create(:message, message_type: :outgoing, content: '**test** *test* ~test~', conversation: create(:conversation, inbox: telegram_channel.inbox, additional_attributes: { 'chat_id' => '123' })) stub_request(:post, "https://api.telegram.org/bot#{telegram_channel.bot_token}/sendMessage") .with( - body: "chat_id=123&text=#{ERB::Util.url_encode('*test* *test* ~test~')}&reply_markup=&parse_mode=Markdown&reply_to_message_id=" + body: "chat_id=123&text=#{ + ERB::Util.url_encode('test test ~test~') + }&reply_markup=&parse_mode=HTML&reply_to_message_id=" ) .to_return( status: 200, @@ -49,7 +103,7 @@ RSpec.describe Channel::Telegram do .with( body: 'chat_id=123&text=test' \ '&reply_markup=%7B%22one_time_keyboard%22%3Atrue%2C%22inline_keyboard%22%3A%5B%5B%7B%22text%22%3A%22test%22%2C%22' \ - 'callback_data%22%3A%22test%22%7D%5D%5D%7D&parse_mode=Markdown&reply_to_message_id=' + 'callback_data%22%3A%22test%22%7D%5D%5D%7D&parse_mode=HTML&reply_to_message_id=' ) .to_return( status: 200, @@ -66,7 +120,7 @@ RSpec.describe Channel::Telegram do stub_request(:post, "https://api.telegram.org/bot#{telegram_channel.bot_token}/sendMessage") .with( - body: 'chat_id=123&text=test&reply_markup=&parse_mode=Markdown&reply_to_message_id=' + body: 'chat_id=123&text=test&reply_markup=&parse_mode=HTML&reply_to_message_id=' ) .to_return( status: 403,