feat: firecrawl branding api [UPM-15] (#13903)
Adds `WebsiteBrandingService` (OSS) with an Enterprise override using
Firecrawl v2 to extract branding and business data from a URL for
onboarding auto-fill.
OSS version uses HTTParty + Nokogiri to extract:
- Business name (og:site_name or title)
- Language (html lang)
- Favicon
- Social links from `<a>` tags
Enterprise version makes a single Firecrawl call to fetch:
- Structured JSON (name, language, industry via LLM)
- Branding (favicon, primary color)
- Page links
Falls back to OSS if Firecrawl is unavailable or fails.
Social handles (WhatsApp, Facebook, Instagram, Telegram, TikTok, LINE)
are parsed deterministically via a shared `SocialLinkParser`.
> We use links for socials, since the LLM extraction was unreliable,
mostly returned empty, and hallucinated in some rare scenarios
## How to test
```ruby
# OSS (no Firecrawl key needed)
WebsiteBrandingService.new('chatwoot.com').perform
# Enterprise (requires CAPTAIN_FIRECRAWL_API_KEY)
WebsiteBrandingService.new('notion.so').perform
WebsiteBrandingService.new('postman.com').perform
```
Verify the returned hash includes business_name, language,
industry_category, social_handles, and branding with
favicon/primary_color.
<img width="908" height="393" alt="image"
src="https://github.com/user-attachments/assets/e3696887-d366-485a-89a0-8e1a9698a788"
/>
This commit is contained in:
65
app/services/concerns/social_link_parser.rb
Normal file
65
app/services/concerns/social_link_parser.rb
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
module SocialLinkParser
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
SOCIAL_DOMAIN_MAP = {
|
||||||
|
whatsapp: %w[wa.me api.whatsapp.com],
|
||||||
|
line: %w[line.me],
|
||||||
|
facebook: %w[facebook.com fb.com fb.me],
|
||||||
|
instagram: %w[instagram.com],
|
||||||
|
telegram: %w[t.me telegram.me],
|
||||||
|
tiktok: %w[tiktok.com]
|
||||||
|
}.freeze
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def extract_social_from_links(links)
|
||||||
|
handles = {}
|
||||||
|
SOCIAL_DOMAIN_MAP.each do |platform, domains|
|
||||||
|
handles[platform] = find_social_handle(links, platform, domains)
|
||||||
|
end
|
||||||
|
handles
|
||||||
|
end
|
||||||
|
|
||||||
|
def find_social_handle(links, platform, domains)
|
||||||
|
matching_links = links.select do |l|
|
||||||
|
uri = URI.parse(l)
|
||||||
|
domains.any? { |d| match_social_domain?(uri.host, d) }
|
||||||
|
rescue URI::InvalidURIError
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
matching_links.each do |link|
|
||||||
|
handle = parse_social_handle(platform, link)
|
||||||
|
return handle if handle.present?
|
||||||
|
end
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def match_social_domain?(host, domain)
|
||||||
|
return false if host.blank?
|
||||||
|
|
||||||
|
host == domain || host.end_with?(".#{domain}")
|
||||||
|
end
|
||||||
|
|
||||||
|
SHARE_PATH_PREFIXES = %w[sharer share intent dialog].freeze
|
||||||
|
|
||||||
|
def parse_social_handle(platform, link)
|
||||||
|
uri = URI.parse(link)
|
||||||
|
return extract_whatsapp_phone(uri) if platform == :whatsapp
|
||||||
|
|
||||||
|
handle = uri.path.to_s.delete_prefix('/').delete_suffix('/')
|
||||||
|
return nil if handle.blank?
|
||||||
|
return nil if SHARE_PATH_PREFIXES.any? { |prefix| handle.start_with?(prefix) }
|
||||||
|
|
||||||
|
handle.presence
|
||||||
|
rescue URI::InvalidURIError
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
# wa.me/1234567890 or api.whatsapp.com/send?phone=1234567890
|
||||||
|
def extract_whatsapp_phone(uri)
|
||||||
|
phone = CGI.parse(uri.query.to_s)['phone']&.first
|
||||||
|
phone = uri.path.to_s.delete_prefix('/').delete_suffix('/') if phone.blank?
|
||||||
|
phone.presence&.gsub(/[^\d]/, '')
|
||||||
|
end
|
||||||
|
end
|
||||||
93
app/services/website_branding_service.rb
Normal file
93
app/services/website_branding_service.rb
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
class WebsiteBrandingService
|
||||||
|
include SocialLinkParser
|
||||||
|
|
||||||
|
def initialize(url)
|
||||||
|
@url = normalize_url(url)
|
||||||
|
end
|
||||||
|
|
||||||
|
def perform
|
||||||
|
doc = fetch_page
|
||||||
|
return nil if doc.nil?
|
||||||
|
|
||||||
|
links = extract_links(doc)
|
||||||
|
|
||||||
|
{
|
||||||
|
business_name: extract_business_name(doc),
|
||||||
|
language: extract_language(doc),
|
||||||
|
industry_category: nil,
|
||||||
|
social_handles: extract_social_from_links(links),
|
||||||
|
branding: extract_branding(doc)
|
||||||
|
}
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[WebsiteBranding] #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def normalize_url(url)
|
||||||
|
url.match?(%r{\Ahttps?://}) ? url : "https://#{url}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def fetch_page
|
||||||
|
response = HTTParty.get(@url, follow_redirects: true, timeout: 15)
|
||||||
|
return nil unless response.success?
|
||||||
|
|
||||||
|
Nokogiri::HTML(response.body)
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[WebsiteBranding] Failed to fetch #{@url}: #{e.message}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_business_name(doc)
|
||||||
|
og_site_name = doc.at_css('meta[property="og:site_name"]')&.[]('content')
|
||||||
|
return og_site_name.strip if og_site_name.present?
|
||||||
|
|
||||||
|
title = doc.at_xpath('//title')&.text
|
||||||
|
title&.strip&.split(/\s*[|\-–—·:]+\s*/)&.first
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_language(doc)
|
||||||
|
doc.at_css('html')&.[]('lang')&.split('-')&.first&.downcase
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_links(doc)
|
||||||
|
doc.css('a[href]').filter_map do |a|
|
||||||
|
href = a['href']&.strip
|
||||||
|
next if href.blank? || href.start_with?('#', 'javascript:', 'mailto:', 'tel:')
|
||||||
|
|
||||||
|
href.start_with?('http') ? href : URI.join(@url, href).to_s
|
||||||
|
rescue URI::InvalidURIError
|
||||||
|
nil
|
||||||
|
end.uniq
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_branding(doc)
|
||||||
|
{
|
||||||
|
favicon: extract_favicon(doc),
|
||||||
|
primary_color: extract_theme_color(doc)
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_favicon(doc)
|
||||||
|
favicon = doc.at_css('link[rel*="icon"]')&.[]('href')
|
||||||
|
return nil if favicon.blank?
|
||||||
|
|
||||||
|
resolve_url(favicon)
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_theme_color(doc)
|
||||||
|
doc.at_css('meta[name="theme-color"]')&.[]('content')
|
||||||
|
end
|
||||||
|
|
||||||
|
def resolve_url(url)
|
||||||
|
return nil if url.blank?
|
||||||
|
return url if url.start_with?('http')
|
||||||
|
|
||||||
|
URI.join(@url, url).to_s
|
||||||
|
rescue URI::InvalidURIError
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
WebsiteBrandingService.prepend_mod_with('WebsiteBrandingService')
|
||||||
112
enterprise/app/services/enterprise/website_branding_service.rb
Normal file
112
enterprise/app/services/enterprise/website_branding_service.rb
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
module Enterprise::WebsiteBrandingService
|
||||||
|
FIRECRAWL_SCRAPE_ENDPOINT = 'https://api.firecrawl.dev/v2/scrape'.freeze
|
||||||
|
|
||||||
|
INDUSTRY_CATEGORIES = [
|
||||||
|
'Technology',
|
||||||
|
'E-commerce',
|
||||||
|
'Healthcare',
|
||||||
|
'Education',
|
||||||
|
'Finance',
|
||||||
|
'Real Estate',
|
||||||
|
'Marketing',
|
||||||
|
'Travel & Hospitality',
|
||||||
|
'Food & Beverage',
|
||||||
|
'Media & Entertainment',
|
||||||
|
'Professional Services',
|
||||||
|
'Non-profit',
|
||||||
|
'Other'
|
||||||
|
].freeze
|
||||||
|
|
||||||
|
def perform
|
||||||
|
return super unless firecrawl_enabled?
|
||||||
|
|
||||||
|
response = perform_firecrawl_request
|
||||||
|
process_firecrawl_response(response)
|
||||||
|
rescue StandardError => e
|
||||||
|
Rails.logger.error "[WebsiteBranding] Firecrawl failed: #{e.message}, falling back to basic scrape"
|
||||||
|
super
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def firecrawl_enabled?
|
||||||
|
firecrawl_api_key.present?
|
||||||
|
end
|
||||||
|
|
||||||
|
def firecrawl_api_key
|
||||||
|
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value
|
||||||
|
end
|
||||||
|
|
||||||
|
def perform_firecrawl_request
|
||||||
|
HTTParty.post(
|
||||||
|
FIRECRAWL_SCRAPE_ENDPOINT,
|
||||||
|
body: scrape_payload.to_json,
|
||||||
|
headers: {
|
||||||
|
'Authorization' => "Bearer #{firecrawl_api_key}",
|
||||||
|
'Content-Type' => 'application/json'
|
||||||
|
}
|
||||||
|
)
|
||||||
|
end
|
||||||
|
|
||||||
|
def scrape_payload
|
||||||
|
{
|
||||||
|
url: @url,
|
||||||
|
onlyMainContent: false,
|
||||||
|
formats: [
|
||||||
|
{
|
||||||
|
type: 'json',
|
||||||
|
schema: extract_schema,
|
||||||
|
prompt: 'Extract the business name, primary language, and industry category from this website.'
|
||||||
|
},
|
||||||
|
'branding',
|
||||||
|
'links'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_schema
|
||||||
|
{
|
||||||
|
type: 'object',
|
||||||
|
properties: {
|
||||||
|
business_name: { type: 'string', description: 'The name of the business or company' },
|
||||||
|
language: { type: 'string', description: 'Primary language as ISO 639-1 code (e.g., en, es, fr)' },
|
||||||
|
industry_category: { type: 'string', enum: INDUSTRY_CATEGORIES, description: 'Industry category for this business' }
|
||||||
|
},
|
||||||
|
required: %w[business_name]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def process_firecrawl_response(response)
|
||||||
|
raise "API Error: #{response.message} (Status: #{response.code})" unless response.success?
|
||||||
|
|
||||||
|
format_firecrawl_response(response)
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_firecrawl_response(response)
|
||||||
|
data = response.parsed_response
|
||||||
|
extract = data.dig('data', 'json') || {}
|
||||||
|
brand = data.dig('data', 'branding') || {}
|
||||||
|
links = data.dig('data', 'links') || []
|
||||||
|
|
||||||
|
{
|
||||||
|
business_name: extract['business_name'],
|
||||||
|
language: extract['language'],
|
||||||
|
industry_category: extract['industry_category'],
|
||||||
|
social_handles: extract_social_from_links(links),
|
||||||
|
branding: extract_firecrawl_branding(brand)
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def extract_firecrawl_branding(brand)
|
||||||
|
{
|
||||||
|
favicon: url_or_nil(brand.dig('images', 'favicon')),
|
||||||
|
primary_color: brand.dig('colors', 'primary')
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def url_or_nil(value)
|
||||||
|
return nil if value.blank? || !value.start_with?('http')
|
||||||
|
|
||||||
|
value
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -0,0 +1,171 @@
|
|||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
# Simulate the prepend_mod_with behavior for testing
|
||||||
|
test_klass = Class.new(WebsiteBrandingService) do
|
||||||
|
prepend Enterprise::WebsiteBrandingService
|
||||||
|
end
|
||||||
|
|
||||||
|
RSpec.describe Enterprise::WebsiteBrandingService do
|
||||||
|
describe '#perform' do
|
||||||
|
subject(:service) { test_klass.new(url) }
|
||||||
|
|
||||||
|
let(:url) { 'https://example.com' }
|
||||||
|
let(:api_key) { 'test-firecrawl-api-key' }
|
||||||
|
let(:scrape_endpoint) { described_class::FIRECRAWL_SCRAPE_ENDPOINT }
|
||||||
|
let(:fallback_html) { '<html lang="en"><head><title>Fallback</title></head><body></body></html>' }
|
||||||
|
let(:success_response_body) do
|
||||||
|
{
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
json: {
|
||||||
|
business_name: 'Acme Corp',
|
||||||
|
language: 'en',
|
||||||
|
industry_category: 'Technology'
|
||||||
|
},
|
||||||
|
branding: {
|
||||||
|
images: { logo: 'https://example.com/logo.png', favicon: 'https://example.com/favicon.png' },
|
||||||
|
colors: { primary: '#FF5733' }
|
||||||
|
},
|
||||||
|
links: [
|
||||||
|
'https://example.com/about',
|
||||||
|
'https://facebook.com/acmecorp',
|
||||||
|
'https://instagram.com/acme_corp',
|
||||||
|
'https://wa.me/1234567890',
|
||||||
|
'https://t.me/acmecorp',
|
||||||
|
'https://tiktok.com/@acmetok'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}.to_json
|
||||||
|
end
|
||||||
|
|
||||||
|
before do
|
||||||
|
stub_request(:get, url).to_return(status: 200, body: fallback_html, headers: { 'content-type' => 'text/html' })
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when firecrawl is configured and API returns success' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
stub_request(:post, scrape_endpoint)
|
||||||
|
.with(headers: { 'Authorization' => "Bearer #{api_key}", 'Content-Type' => 'application/json' })
|
||||||
|
.to_return(status: 200, body: success_response_body, headers: { 'content-type' => 'application/json' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns business info and branding from firecrawl' do
|
||||||
|
result = service.perform
|
||||||
|
|
||||||
|
expect(result).to eq({
|
||||||
|
business_name: 'Acme Corp',
|
||||||
|
language: 'en',
|
||||||
|
industry_category: 'Technology',
|
||||||
|
social_handles: {
|
||||||
|
whatsapp: '1234567890',
|
||||||
|
line: nil,
|
||||||
|
facebook: 'acmecorp',
|
||||||
|
instagram: 'acme_corp',
|
||||||
|
telegram: 'acmecorp',
|
||||||
|
tiktok: '@acmetok'
|
||||||
|
},
|
||||||
|
branding: {
|
||||||
|
favicon: 'https://example.com/favicon.png',
|
||||||
|
primary_color: '#FF5733'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when firecrawl API returns an error' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
stub_request(:post, scrape_endpoint)
|
||||||
|
.to_return(status: 422, body: '{"error": "Invalid URL"}', headers: {})
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'falls back to basic scrape' do
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:business_name]).to eq('Fallback')
|
||||||
|
expect(result[:industry_category]).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when firecrawl raises an exception' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
stub_request(:post, scrape_endpoint).to_raise(StandardError.new('connection refused'))
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'falls back to basic scrape' do
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:business_name]).to eq('Fallback')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when firecrawl is not configured' do
|
||||||
|
it 'uses basic scrape' do
|
||||||
|
expect(HTTParty).not_to receive(:post)
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:business_name]).to eq('Fallback')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when WhatsApp link uses api.whatsapp.com format' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
response = {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
json: { business_name: 'Acme Corp' },
|
||||||
|
links: ['https://api.whatsapp.com/send?phone=5511999999999&text=Hello']
|
||||||
|
}
|
||||||
|
}.to_json
|
||||||
|
stub_request(:post, scrape_endpoint)
|
||||||
|
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'extracts phone number from query param' do
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when WhatsApp link uses wa.me format' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
response = {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
json: { business_name: 'Acme Corp' },
|
||||||
|
links: ['https://wa.me/+5511999999999']
|
||||||
|
}
|
||||||
|
}.to_json
|
||||||
|
stub_request(:post, scrape_endpoint)
|
||||||
|
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'extracts phone number from path' do
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when links contain lookalike domains' do
|
||||||
|
before do
|
||||||
|
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||||
|
response = {
|
||||||
|
success: true,
|
||||||
|
data: {
|
||||||
|
json: { business_name: 'Acme Corp' },
|
||||||
|
links: ['https://notfacebook.com/page', 'https://fakeinstagram.com/user']
|
||||||
|
}
|
||||||
|
}.to_json
|
||||||
|
stub_request(:post, scrape_endpoint)
|
||||||
|
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not match lookalike domains' do
|
||||||
|
result = service.perform
|
||||||
|
expect(result[:social_handles][:facebook]).to be_nil
|
||||||
|
expect(result[:social_handles][:instagram]).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
156
spec/services/website_branding_service_spec.rb
Normal file
156
spec/services/website_branding_service_spec.rb
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe WebsiteBrandingService do
|
||||||
|
describe '#perform' do
|
||||||
|
let(:url) { 'https://example.com' }
|
||||||
|
let(:html_body) do
|
||||||
|
<<~HTML
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Acme Corp | Home</title>
|
||||||
|
<meta property="og:site_name" content="Acme Corp" />
|
||||||
|
<meta property="og:image" content="https://example.com/og-image.png" />
|
||||||
|
<meta name="theme-color" content="#FF5733" />
|
||||||
|
<link rel="icon" href="/favicon.ico" />
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<header><a href="/">Home</a></header>
|
||||||
|
<footer>
|
||||||
|
<a href="https://facebook.com/acmecorp">Facebook</a>
|
||||||
|
<a href="https://instagram.com/acme_corp">Instagram</a>
|
||||||
|
<a href="https://wa.me/1234567890">WhatsApp</a>
|
||||||
|
<a href="https://t.me/acmecorp">Telegram</a>
|
||||||
|
<a href="https://tiktok.com/@acmetok">TikTok</a>
|
||||||
|
</footer>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
before do
|
||||||
|
stub_request(:get, url).to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'extracts business info, branding, and social handles' do
|
||||||
|
result = described_class.new(url).perform
|
||||||
|
|
||||||
|
expect(result).to eq({
|
||||||
|
business_name: 'Acme Corp',
|
||||||
|
language: 'en',
|
||||||
|
industry_category: nil,
|
||||||
|
social_handles: {
|
||||||
|
whatsapp: '1234567890',
|
||||||
|
line: nil,
|
||||||
|
facebook: 'acmecorp',
|
||||||
|
instagram: 'acme_corp',
|
||||||
|
telegram: 'acmecorp',
|
||||||
|
tiktok: '@acmetok'
|
||||||
|
},
|
||||||
|
branding: {
|
||||||
|
favicon: 'https://example.com/favicon.ico',
|
||||||
|
primary_color: '#FF5733'
|
||||||
|
}
|
||||||
|
})
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when og:site_name is missing' do
|
||||||
|
let(:html_body) do
|
||||||
|
<<~HTML
|
||||||
|
<html lang="fr">
|
||||||
|
<head><title>Mon Entreprise - Bienvenue</title></head>
|
||||||
|
<body></body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'falls back to the first segment of the title' do
|
||||||
|
result = described_class.new(url).perform
|
||||||
|
expect(result[:business_name]).to eq('Mon Entreprise')
|
||||||
|
expect(result[:language]).to eq('fr')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when the page fails to load' do
|
||||||
|
before { stub_request(:get, url).to_return(status: 500, body: '') }
|
||||||
|
|
||||||
|
it 'returns nil' do
|
||||||
|
expect(described_class.new(url).perform).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when a network error occurs' do
|
||||||
|
before { stub_request(:get, url).to_raise(StandardError.new('connection refused')) }
|
||||||
|
|
||||||
|
it 'logs the error and returns nil' do
|
||||||
|
expect(Rails.logger).to receive(:error).with(/connection refused/)
|
||||||
|
expect(described_class.new(url).perform).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when URL has no scheme' do
|
||||||
|
before do
|
||||||
|
stub_request(:get, 'https://example.com').to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'prepends https://' do
|
||||||
|
result = described_class.new('example.com').perform
|
||||||
|
expect(result[:business_name]).to eq('Acme Corp')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when WhatsApp link uses api.whatsapp.com format' do
|
||||||
|
let(:html_body) do
|
||||||
|
<<~HTML
|
||||||
|
<html lang="en">
|
||||||
|
<head><title>Test</title></head>
|
||||||
|
<body><a href="https://api.whatsapp.com/send?phone=5511999999999&text=Hello">Chat</a></body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'extracts phone from query param' do
|
||||||
|
result = described_class.new(url).perform
|
||||||
|
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when links contain lookalike domains' do
|
||||||
|
let(:html_body) do
|
||||||
|
<<~HTML
|
||||||
|
<html lang="en">
|
||||||
|
<head><title>Test</title></head>
|
||||||
|
<body>
|
||||||
|
<a href="https://notfacebook.com/page">Not FB</a>
|
||||||
|
<a href="https://fakeinstagram.com/user">Not IG</a>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'does not match lookalike domains' do
|
||||||
|
result = described_class.new(url).perform
|
||||||
|
expect(result[:social_handles][:facebook]).to be_nil
|
||||||
|
expect(result[:social_handles][:instagram]).to be_nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context 'when favicon uses a relative path without leading slash' do
|
||||||
|
let(:html_body) do
|
||||||
|
<<~HTML
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<title>Test</title>
|
||||||
|
<link rel="icon" href="favicon.ico" />
|
||||||
|
</head>
|
||||||
|
<body></body>
|
||||||
|
</html>
|
||||||
|
HTML
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'resolves the relative favicon URL' do
|
||||||
|
result = described_class.new(url).perform
|
||||||
|
expect(result[:branding][:favicon]).to eq('https://example.com/favicon.ico')
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user