feat: account enrichment using context.dev [UPM-27] (#13978)
## Account branding enrichment during signup This PR does the following ### Replace Firecrawl with Context.dev Switches the enterprise brand lookup from Firecrawl to Context.dev for better data quality, built-in caching, and automatic filtering of free/disposable email providers. The service interface changes from URL to email input to match Context.dev's email endpoint. OSS still falls back to basic HTML scraping with a normalized output shape across both paths. The enterprise path intentionally does not fall back to HTML scraping on failure — speed matters more than completeness. We want the user on the editable onboarding form fast, and a slow fallback scrape is worse than letting them fill it in. Requires `CONTEXT_DEV_API_KEY` in Super Admin → App Config. Without it, falls back to OSS HTML scraping. ### Add job to enrich account details After account creation, `Account::BrandingEnrichmentJob` looks up the signup email and pre-fills the account name, colors, logos, social links, and industry into `custom_attributes['brand_info']`. The job signals completion via a short-lived Redis key (30s TTL) + an ActionCable broadcast (`account.enrichment_completed`). The Redis key lets the frontend distinguish "still running" from "finished with no results."
This commit is contained in:
@@ -30,6 +30,7 @@ class Api::V1::AccountsController < Api::BaseController
|
||||
locale: account_params[:locale],
|
||||
user: current_user
|
||||
).perform
|
||||
enqueue_branding_enrichment
|
||||
if @user
|
||||
# Authenticated users (dashboard "add account") and api_only signups
|
||||
# need the full response with account_id. API-only deployments have no
|
||||
@@ -69,6 +70,16 @@ class Api::V1::AccountsController < Api::BaseController
|
||||
|
||||
private
|
||||
|
||||
def enqueue_branding_enrichment
|
||||
return if account_params[:email].blank?
|
||||
|
||||
Account::BrandingEnrichmentJob.perform_later(@account.id, account_params[:email])
|
||||
Redis::Alfred.set(format(Redis::Alfred::ACCOUNT_ONBOARDING_ENRICHMENT, account_id: @account.id), '1', ex: 30)
|
||||
rescue StandardError => e
|
||||
# Enrichment is optional — never let queue/Redis failures abort signup
|
||||
ChatwootExceptionTracker.new(e).capture_exception
|
||||
end
|
||||
|
||||
def ensure_account_name
|
||||
# ensure that account_name and user_full_name is present
|
||||
# this is becuase the account builder and the models validations are not triggered
|
||||
|
||||
32
app/jobs/account/branding_enrichment_job.rb
Normal file
32
app/jobs/account/branding_enrichment_job.rb
Normal file
@@ -0,0 +1,32 @@
|
||||
class Account::BrandingEnrichmentJob < ApplicationJob
|
||||
queue_as :low
|
||||
|
||||
def perform(account_id, email)
|
||||
result = WebsiteBrandingService.new(email).perform
|
||||
return if result.blank?
|
||||
|
||||
account = Account.find(account_id)
|
||||
account.name = result[:title] if result[:title].present?
|
||||
account.custom_attributes['brand_info'] = result if account.custom_attributes['brand_info'].blank?
|
||||
account.save! if account.changed?
|
||||
ensure
|
||||
finish_enrichment(account_id)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def finish_enrichment(account_id)
|
||||
Redis::Alfred.delete(format(Redis::Alfred::ACCOUNT_ONBOARDING_ENRICHMENT, account_id: account_id))
|
||||
|
||||
account = Account.find(account_id)
|
||||
if account.custom_attributes['onboarding_step'] == 'enrichment'
|
||||
account.custom_attributes['onboarding_step'] = 'account_details'
|
||||
account.save!
|
||||
end
|
||||
|
||||
user = account.administrators.first
|
||||
return unless user
|
||||
|
||||
ActionCableBroadcastJob.perform_later([user.pubsub_token], 'account.enrichment_completed', { account_id: account_id })
|
||||
end
|
||||
end
|
||||
@@ -1,8 +1,15 @@
|
||||
class WebsiteBrandingService
|
||||
include SocialLinkParser
|
||||
|
||||
def initialize(url)
|
||||
@url = normalize_url(url)
|
||||
attr_reader :http_status
|
||||
|
||||
DATA_DEFAULTS = { description: nil, slogan: nil, phone: nil, address: nil, links: nil, stock: nil, industries: [], is_nsfw: false }.freeze
|
||||
|
||||
def initialize(email)
|
||||
@email = email
|
||||
@domain = email.split('@').last&.downcase&.strip
|
||||
@url = "https://#{@domain}"
|
||||
@http_status = nil
|
||||
end
|
||||
|
||||
def perform
|
||||
@@ -11,13 +18,14 @@ class WebsiteBrandingService
|
||||
|
||||
links = extract_links(doc)
|
||||
|
||||
{
|
||||
business_name: extract_business_name(doc),
|
||||
language: extract_language(doc),
|
||||
industry_category: nil,
|
||||
social_handles: extract_social_from_links(links),
|
||||
branding: extract_branding(doc)
|
||||
}
|
||||
DATA_DEFAULTS.merge({
|
||||
domain: @domain,
|
||||
title: extract_title(doc),
|
||||
colors: extract_colors(doc),
|
||||
logos: extract_logos(doc),
|
||||
socials: build_socials(links),
|
||||
email: @email
|
||||
})
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[WebsiteBranding] #{e.message}"
|
||||
nil
|
||||
@@ -25,12 +33,9 @@ class WebsiteBrandingService
|
||||
|
||||
private
|
||||
|
||||
def normalize_url(url)
|
||||
url.match?(%r{\Ahttps?://}) ? url : "https://#{url}"
|
||||
end
|
||||
|
||||
def fetch_page
|
||||
response = HTTParty.get(@url, follow_redirects: true, timeout: 15)
|
||||
@http_status = response.code
|
||||
return nil unless response.success?
|
||||
|
||||
Nokogiri::HTML(response.body)
|
||||
@@ -39,7 +44,7 @@ class WebsiteBrandingService
|
||||
nil
|
||||
end
|
||||
|
||||
def extract_business_name(doc)
|
||||
def extract_title(doc)
|
||||
og_site_name = doc.at_css('meta[property="og:site_name"]')&.[]('content')
|
||||
return og_site_name.strip if og_site_name.present?
|
||||
|
||||
@@ -47,8 +52,37 @@ class WebsiteBrandingService
|
||||
title&.strip&.split(/\s*[|\-–—·:]+\s*/)&.first
|
||||
end
|
||||
|
||||
def extract_language(doc)
|
||||
doc.at_css('html')&.[]('lang')&.split('-')&.first&.downcase
|
||||
def extract_colors(doc)
|
||||
color = doc.at_css('meta[name="theme-color"]')&.[]('content')
|
||||
return [] if color.blank?
|
||||
|
||||
[{ hex: color, name: nil }]
|
||||
end
|
||||
|
||||
def extract_logos(doc)
|
||||
favicon = doc.at_css('link[rel*="icon"]')&.[]('href')
|
||||
return [] if favicon.blank?
|
||||
|
||||
url = resolve_url(favicon)
|
||||
return [] if url.blank?
|
||||
|
||||
[{ url: url, type: nil, mode: nil, colors: [], resolution: { aspect_ratio: 1 } }]
|
||||
end
|
||||
|
||||
def build_socials(links)
|
||||
handles = extract_social_from_links(links)
|
||||
handles.filter_map do |platform, handle|
|
||||
next if handle.blank?
|
||||
|
||||
url = reconstruct_social_url(platform, handle)
|
||||
{ type: platform.to_s, url: url }
|
||||
end
|
||||
end
|
||||
|
||||
def reconstruct_social_url(platform, handle)
|
||||
base_urls = { whatsapp: 'https://wa.me/', line: 'https://line.me/', facebook: 'https://facebook.com/',
|
||||
instagram: 'https://instagram.com/', telegram: 'https://t.me/', tiktok: 'https://tiktok.com/' }
|
||||
"#{base_urls[platform]}#{handle}"
|
||||
end
|
||||
|
||||
def extract_links(doc)
|
||||
@@ -62,24 +96,6 @@ class WebsiteBrandingService
|
||||
end.uniq
|
||||
end
|
||||
|
||||
def extract_branding(doc)
|
||||
{
|
||||
favicon: extract_favicon(doc),
|
||||
primary_color: extract_theme_color(doc)
|
||||
}
|
||||
end
|
||||
|
||||
def extract_favicon(doc)
|
||||
favicon = doc.at_css('link[rel*="icon"]')&.[]('href')
|
||||
return nil if favicon.blank?
|
||||
|
||||
resolve_url(favicon)
|
||||
end
|
||||
|
||||
def extract_theme_color(doc)
|
||||
doc.at_css('meta[name="theme-color"]')&.[]('content')
|
||||
end
|
||||
|
||||
def resolve_url(url)
|
||||
return nil if url.blank?
|
||||
return url if url.start_with?('http')
|
||||
|
||||
@@ -211,6 +211,13 @@
|
||||
type: code
|
||||
# End of Captain Config
|
||||
|
||||
# ------- Context.dev Config ------- #
|
||||
- name: CONTEXT_DEV_API_KEY
|
||||
display_title: 'Context.dev API Key'
|
||||
description: 'API key for Context.dev branding service used during account onboarding'
|
||||
type: secret
|
||||
# ------- End of Context.dev Config ------- #
|
||||
|
||||
# ------- Chatwoot Internal Config for Cloud ----#
|
||||
- name: CHATWOOT_INBOX_TOKEN
|
||||
value:
|
||||
|
||||
@@ -34,9 +34,9 @@ module Enterprise::SuperAdmin::AppConfigsController
|
||||
end
|
||||
|
||||
def internal_config_options
|
||||
%w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY DASHBOARD_SCRIPTS INACTIVE_WHATSAPP_NUMBERS
|
||||
SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL CHATWOOT_INSTANCE_ADMIN_EMAIL
|
||||
OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS
|
||||
%w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY CONTEXT_DEV_API_KEY DASHBOARD_SCRIPTS
|
||||
INACTIVE_WHATSAPP_NUMBERS SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL
|
||||
CHATWOOT_INSTANCE_ADMIN_EMAIL OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS
|
||||
OTEL_PROVIDER LANGFUSE_PUBLIC_KEY LANGFUSE_SECRET_KEY LANGFUSE_BASE_URL]
|
||||
end
|
||||
|
||||
|
||||
@@ -1,112 +1,63 @@
|
||||
module Enterprise::WebsiteBrandingService
|
||||
FIRECRAWL_SCRAPE_ENDPOINT = 'https://api.firecrawl.dev/v2/scrape'.freeze
|
||||
|
||||
INDUSTRY_CATEGORIES = [
|
||||
'Technology',
|
||||
'E-commerce',
|
||||
'Healthcare',
|
||||
'Education',
|
||||
'Finance',
|
||||
'Real Estate',
|
||||
'Marketing',
|
||||
'Travel & Hospitality',
|
||||
'Food & Beverage',
|
||||
'Media & Entertainment',
|
||||
'Professional Services',
|
||||
'Non-profit',
|
||||
'Other'
|
||||
].freeze
|
||||
CONTEXT_DEV_ENDPOINT = 'https://api.context.dev/v1/brand/retrieve-by-email'.freeze
|
||||
|
||||
def perform
|
||||
return super unless firecrawl_enabled?
|
||||
return super unless context_dev_enabled?
|
||||
|
||||
response = perform_firecrawl_request
|
||||
process_firecrawl_response(response)
|
||||
response = fetch_brand
|
||||
process_response(response)
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[WebsiteBranding] Firecrawl failed: #{e.message}, falling back to basic scrape"
|
||||
super
|
||||
Rails.logger.error "[WebsiteBranding] Context.dev failed: #{e.message}"
|
||||
nil
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def firecrawl_enabled?
|
||||
firecrawl_api_key.present?
|
||||
def context_dev_enabled?
|
||||
context_dev_api_key.present?
|
||||
end
|
||||
|
||||
def firecrawl_api_key
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value
|
||||
def context_dev_api_key
|
||||
InstallationConfig.find_by(name: 'CONTEXT_DEV_API_KEY')&.value
|
||||
end
|
||||
|
||||
def perform_firecrawl_request
|
||||
HTTParty.post(
|
||||
FIRECRAWL_SCRAPE_ENDPOINT,
|
||||
body: scrape_payload.to_json,
|
||||
def fetch_brand
|
||||
HTTParty.get(
|
||||
CONTEXT_DEV_ENDPOINT,
|
||||
query: { email: @email },
|
||||
headers: {
|
||||
'Authorization' => "Bearer #{firecrawl_api_key}",
|
||||
'Authorization' => "Bearer #{context_dev_api_key}",
|
||||
'Content-Type' => 'application/json'
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
def scrape_payload
|
||||
{
|
||||
url: @url,
|
||||
onlyMainContent: false,
|
||||
formats: [
|
||||
{
|
||||
type: 'json',
|
||||
schema: extract_schema,
|
||||
prompt: 'Extract the business name, primary language, and industry category from this website.'
|
||||
},
|
||||
'branding',
|
||||
'links'
|
||||
]
|
||||
}
|
||||
end
|
||||
|
||||
def extract_schema
|
||||
{
|
||||
type: 'object',
|
||||
properties: {
|
||||
business_name: { type: 'string', description: 'The name of the business or company' },
|
||||
language: { type: 'string', description: 'Primary language as ISO 639-1 code (e.g., en, es, fr)' },
|
||||
industry_category: { type: 'string', enum: INDUSTRY_CATEGORIES, description: 'Industry category for this business' }
|
||||
},
|
||||
required: %w[business_name]
|
||||
}
|
||||
end
|
||||
|
||||
def process_firecrawl_response(response)
|
||||
def process_response(response)
|
||||
@http_status = response.code
|
||||
raise "API Error: #{response.message} (Status: #{response.code})" unless response.success?
|
||||
|
||||
format_firecrawl_response(response)
|
||||
brand = response.parsed_response&.dig('brand')
|
||||
return nil if brand.blank?
|
||||
|
||||
format_brand(brand)
|
||||
end
|
||||
|
||||
def format_firecrawl_response(response)
|
||||
data = response.parsed_response
|
||||
extract = data.dig('data', 'json') || {}
|
||||
brand = data.dig('data', 'branding') || {}
|
||||
links = data.dig('data', 'links') || []
|
||||
|
||||
def format_brand(brand)
|
||||
{
|
||||
business_name: extract['business_name'],
|
||||
language: extract['language'],
|
||||
industry_category: extract['industry_category'],
|
||||
social_handles: extract_social_from_links(links),
|
||||
branding: extract_firecrawl_branding(brand)
|
||||
}
|
||||
end
|
||||
|
||||
def extract_firecrawl_branding(brand)
|
||||
{
|
||||
favicon: url_or_nil(brand.dig('images', 'favicon')),
|
||||
primary_color: brand.dig('colors', 'primary')
|
||||
}
|
||||
end
|
||||
|
||||
def url_or_nil(value)
|
||||
return nil if value.blank? || !value.start_with?('http')
|
||||
|
||||
value
|
||||
domain: brand['domain'],
|
||||
title: brand['title'],
|
||||
description: brand['description'],
|
||||
slogan: brand['slogan'],
|
||||
phone: brand['phone'],
|
||||
address: brand['address'],
|
||||
colors: brand['colors'] || [],
|
||||
logos: brand['logos'] || [],
|
||||
socials: brand['socials'] || [],
|
||||
links: brand['links'],
|
||||
email: @email,
|
||||
industries: brand.dig('industries', 'eic') || [],
|
||||
stock: brand['stock'],
|
||||
is_nsfw: brand['is_nsfw'] || false
|
||||
}.deep_symbolize_keys
|
||||
end
|
||||
end
|
||||
|
||||
@@ -50,6 +50,9 @@ module Redis::RedisKeys
|
||||
ASSIGNMENT_KEY = 'ASSIGNMENT::%<inbox_id>d::AGENT::%<agent_id>d::CONVERSATION::%<conversation_id>d'.freeze
|
||||
ASSIGNMENT_KEY_PATTERN = 'ASSIGNMENT::%<inbox_id>d::AGENT::%<agent_id>d::*'.freeze
|
||||
|
||||
## Account Onboarding
|
||||
ACCOUNT_ONBOARDING_ENRICHMENT = 'ONBOARDING_ENRICHMENT::%<account_id>d'.freeze
|
||||
|
||||
## Account Email Rate Limiting
|
||||
ACCOUNT_OUTBOUND_EMAIL_COUNT_KEY = 'OUTBOUND_EMAIL_COUNT::%<account_id>d::%<date>s'.freeze
|
||||
end
|
||||
|
||||
@@ -7,164 +7,111 @@ end
|
||||
|
||||
RSpec.describe Enterprise::WebsiteBrandingService do
|
||||
describe '#perform' do
|
||||
subject(:service) { test_klass.new(url) }
|
||||
subject(:service) { test_klass.new(email) }
|
||||
|
||||
let(:url) { 'https://example.com' }
|
||||
let(:api_key) { 'test-firecrawl-api-key' }
|
||||
let(:scrape_endpoint) { described_class::FIRECRAWL_SCRAPE_ENDPOINT }
|
||||
let(:fallback_html) { '<html lang="en"><head><title>Fallback</title></head><body></body></html>' }
|
||||
let(:email) { 'user@example.com' }
|
||||
let(:api_key) { 'test-context-dev-api-key' }
|
||||
let(:endpoint) { described_class::CONTEXT_DEV_ENDPOINT }
|
||||
let(:fallback_html) { '<html><head><title>Fallback</title></head><body></body></html>' }
|
||||
let(:success_response_body) do
|
||||
{
|
||||
success: true,
|
||||
data: {
|
||||
json: {
|
||||
business_name: 'Acme Corp',
|
||||
language: 'en',
|
||||
industry_category: 'Technology'
|
||||
},
|
||||
branding: {
|
||||
images: { logo: 'https://example.com/logo.png', favicon: 'https://example.com/favicon.png' },
|
||||
colors: { primary: '#FF5733' }
|
||||
},
|
||||
links: [
|
||||
'https://example.com/about',
|
||||
'https://facebook.com/acmecorp',
|
||||
'https://instagram.com/acme_corp',
|
||||
'https://wa.me/1234567890',
|
||||
'https://t.me/acmecorp',
|
||||
'https://tiktok.com/@acmetok'
|
||||
]
|
||||
status: 'ok',
|
||||
code: 200,
|
||||
brand: {
|
||||
domain: 'example.com',
|
||||
title: 'Acme Corp',
|
||||
description: 'Leading tech company',
|
||||
slogan: 'We build things',
|
||||
is_nsfw: false,
|
||||
colors: [{ hex: '#FF5733', name: 'Orange Red' }],
|
||||
logos: [{ url: 'https://media.brand.dev/logo.png', type: 'icon', mode: 'light',
|
||||
colors: [{ hex: '#FF5733', name: 'Orange Red' }],
|
||||
resolution: { width: 256, height: 256, aspect_ratio: 1 } }],
|
||||
socials: [
|
||||
{ type: 'facebook', url: 'https://facebook.com/acmecorp' },
|
||||
{ type: 'instagram', url: 'https://instagram.com/acme_corp' }
|
||||
],
|
||||
industries: {
|
||||
eic: [{ industry: 'Technology', subindustry: 'Software' }]
|
||||
}
|
||||
}
|
||||
}.to_json
|
||||
end
|
||||
|
||||
before do
|
||||
stub_request(:get, url).to_return(status: 200, body: fallback_html, headers: { 'content-type' => 'text/html' })
|
||||
stub_request(:get, 'https://example.com').to_return(status: 200, body: fallback_html,
|
||||
headers: { 'content-type' => 'text/html' })
|
||||
end
|
||||
|
||||
context 'when firecrawl is configured and API returns success' do
|
||||
context 'when context.dev is configured and API returns success' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
stub_request(:post, scrape_endpoint)
|
||||
.with(headers: { 'Authorization' => "Bearer #{api_key}", 'Content-Type' => 'application/json' })
|
||||
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
|
||||
stub_request(:get, endpoint)
|
||||
.with(query: { email: email }, headers: { 'Authorization' => "Bearer #{api_key}" })
|
||||
.to_return(status: 200, body: success_response_body, headers: { 'content-type' => 'application/json' })
|
||||
end
|
||||
|
||||
it 'returns business info and branding from firecrawl' do
|
||||
it 'returns basic brand info' do
|
||||
result = service.perform
|
||||
|
||||
expect(result).to eq({
|
||||
business_name: 'Acme Corp',
|
||||
language: 'en',
|
||||
industry_category: 'Technology',
|
||||
social_handles: {
|
||||
whatsapp: '1234567890',
|
||||
line: nil,
|
||||
facebook: 'acmecorp',
|
||||
instagram: 'acme_corp',
|
||||
telegram: 'acmecorp',
|
||||
tiktok: '@acmetok'
|
||||
},
|
||||
branding: {
|
||||
favicon: 'https://example.com/favicon.png',
|
||||
primary_color: '#FF5733'
|
||||
}
|
||||
})
|
||||
expect(result).to include(domain: 'example.com', title: 'Acme Corp', description: 'Leading tech company',
|
||||
slogan: 'We build things', is_nsfw: false, email: email)
|
||||
end
|
||||
|
||||
it 'returns colors, logos, socials, and industries' do
|
||||
result = service.perform
|
||||
|
||||
expect(result[:colors]).to eq([{ hex: '#FF5733', name: 'Orange Red' }])
|
||||
expect(result[:logos].first[:url]).to eq('https://media.brand.dev/logo.png')
|
||||
expect(result[:socials]).to eq([{ type: 'facebook', url: 'https://facebook.com/acmecorp' },
|
||||
{ type: 'instagram', url: 'https://instagram.com/acme_corp' }])
|
||||
expect(result[:industries]).to eq([{ industry: 'Technology', subindustry: 'Software' }])
|
||||
end
|
||||
end
|
||||
|
||||
context 'when firecrawl API returns an error' do
|
||||
context 'when context.dev API returns an error' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
stub_request(:post, scrape_endpoint)
|
||||
.to_return(status: 422, body: '{"error": "Invalid URL"}', headers: {})
|
||||
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
|
||||
stub_request(:get, endpoint)
|
||||
.with(query: { email: email })
|
||||
.to_return(status: 422, body: '{"error": "FREE_EMAIL_DETECTED"}')
|
||||
end
|
||||
|
||||
it 'falls back to basic scrape' do
|
||||
result = service.perform
|
||||
expect(result[:business_name]).to eq('Fallback')
|
||||
expect(result[:industry_category]).to be_nil
|
||||
it 'returns nil' do
|
||||
expect(service.perform).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
context 'when firecrawl raises an exception' do
|
||||
context 'when context.dev raises an exception' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
stub_request(:post, scrape_endpoint).to_raise(StandardError.new('connection refused'))
|
||||
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
|
||||
stub_request(:get, endpoint).with(query: { email: email }).to_raise(StandardError.new('connection refused'))
|
||||
end
|
||||
|
||||
it 'falls back to basic scrape' do
|
||||
result = service.perform
|
||||
expect(result[:business_name]).to eq('Fallback')
|
||||
it 'returns nil' do
|
||||
expect(service.perform).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
context 'when firecrawl is not configured' do
|
||||
it 'uses basic scrape' do
|
||||
expect(HTTParty).not_to receive(:post)
|
||||
context 'when context.dev is not configured' do
|
||||
it 'falls back to base scraper' do
|
||||
result = service.perform
|
||||
expect(result[:business_name]).to eq('Fallback')
|
||||
expect(result[:title]).to eq('Fallback')
|
||||
expect(result[:industries]).to eq([])
|
||||
end
|
||||
end
|
||||
|
||||
context 'when WhatsApp link uses api.whatsapp.com format' do
|
||||
context 'when context.dev returns empty brand' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
response = {
|
||||
success: true,
|
||||
data: {
|
||||
json: { business_name: 'Acme Corp' },
|
||||
links: ['https://api.whatsapp.com/send?phone=5511999999999&text=Hello']
|
||||
}
|
||||
}.to_json
|
||||
stub_request(:post, scrape_endpoint)
|
||||
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
|
||||
stub_request(:get, endpoint)
|
||||
.with(query: { email: email })
|
||||
.to_return(status: 200, body: { status: 'ok', code: 200, brand: nil }.to_json,
|
||||
headers: { 'content-type' => 'application/json' })
|
||||
end
|
||||
|
||||
it 'extracts phone number from query param' do
|
||||
result = service.perform
|
||||
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when WhatsApp link uses wa.me format' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
response = {
|
||||
success: true,
|
||||
data: {
|
||||
json: { business_name: 'Acme Corp' },
|
||||
links: ['https://wa.me/+5511999999999']
|
||||
}
|
||||
}.to_json
|
||||
stub_request(:post, scrape_endpoint)
|
||||
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||
end
|
||||
|
||||
it 'extracts phone number from path' do
|
||||
result = service.perform
|
||||
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when links contain lookalike domains' do
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
response = {
|
||||
success: true,
|
||||
data: {
|
||||
json: { business_name: 'Acme Corp' },
|
||||
links: ['https://notfacebook.com/page', 'https://fakeinstagram.com/user']
|
||||
}
|
||||
}.to_json
|
||||
stub_request(:post, scrape_endpoint)
|
||||
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
|
||||
end
|
||||
|
||||
it 'does not match lookalike domains' do
|
||||
result = service.perform
|
||||
expect(result[:social_handles][:facebook]).to be_nil
|
||||
expect(result[:social_handles][:instagram]).to be_nil
|
||||
it 'returns nil' do
|
||||
expect(service.perform).to be_nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -2,6 +2,7 @@ require 'rails_helper'
|
||||
|
||||
RSpec.describe WebsiteBrandingService do
|
||||
describe '#perform' do
|
||||
let(:email) { 'user@example.com' }
|
||||
let(:url) { 'https://example.com' }
|
||||
let(:html_body) do
|
||||
<<~HTML
|
||||
@@ -9,12 +10,21 @@ RSpec.describe WebsiteBrandingService do
|
||||
<head>
|
||||
<title>Acme Corp | Home</title>
|
||||
<meta property="og:site_name" content="Acme Corp" />
|
||||
<meta property="og:image" content="https://example.com/og-image.png" />
|
||||
<meta name="theme-color" content="#FF5733" />
|
||||
<link rel="icon" href="/favicon.ico" />
|
||||
<link rel="shortcut icon" href="/favicon-32.png" />
|
||||
<link rel="apple-touch-icon" href="/apple-touch-icon.png" />
|
||||
<link rel="mask-icon" href="/safari-pinned-tab.svg" />
|
||||
</head>
|
||||
<body>
|
||||
<header><a href="/">Home</a></header>
|
||||
<header>
|
||||
<a href="https://facebook.com/acmecorp">Facebook</a>
|
||||
<a href="https://instagram.com/acme_corp">Instagram</a>
|
||||
</header>
|
||||
<nav>
|
||||
<a href="https://facebook.com/acmecorp">FB</a>
|
||||
<a href="https://t.me/acmecorp">TG</a>
|
||||
</nav>
|
||||
<footer>
|
||||
<a href="https://facebook.com/acmecorp">Facebook</a>
|
||||
<a href="https://instagram.com/acme_corp">Instagram</a>
|
||||
@@ -31,26 +41,19 @@ RSpec.describe WebsiteBrandingService do
|
||||
stub_request(:get, url).to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
|
||||
end
|
||||
|
||||
it 'extracts business info, branding, and social handles' do
|
||||
result = described_class.new(url).perform
|
||||
it 'extracts basic brand info' do
|
||||
result = described_class.new(email).perform
|
||||
|
||||
expect(result).to eq({
|
||||
business_name: 'Acme Corp',
|
||||
language: 'en',
|
||||
industry_category: nil,
|
||||
social_handles: {
|
||||
whatsapp: '1234567890',
|
||||
line: nil,
|
||||
facebook: 'acmecorp',
|
||||
instagram: 'acme_corp',
|
||||
telegram: 'acmecorp',
|
||||
tiktok: '@acmetok'
|
||||
},
|
||||
branding: {
|
||||
favicon: 'https://example.com/favicon.ico',
|
||||
primary_color: '#FF5733'
|
||||
}
|
||||
})
|
||||
expect(result).to include(domain: 'example.com', title: 'Acme Corp', email: email,
|
||||
description: nil, slogan: nil, is_nsfw: false, industries: [])
|
||||
end
|
||||
|
||||
it 'extracts colors, logos, and socials' do
|
||||
result = described_class.new(email).perform
|
||||
|
||||
expect(result[:colors]).to eq([{ hex: '#FF5733', name: nil }])
|
||||
expect(result[:logos].first[:url]).to eq('https://example.com/favicon.ico')
|
||||
expect(result[:socials].map { |s| s[:type] }).to contain_exactly('facebook', 'instagram', 'whatsapp', 'telegram', 'tiktok')
|
||||
end
|
||||
|
||||
context 'when og:site_name is missing' do
|
||||
@@ -64,17 +67,18 @@ RSpec.describe WebsiteBrandingService do
|
||||
end
|
||||
|
||||
it 'falls back to the first segment of the title' do
|
||||
result = described_class.new(url).perform
|
||||
expect(result[:business_name]).to eq('Mon Entreprise')
|
||||
expect(result[:language]).to eq('fr')
|
||||
result = described_class.new(email).perform
|
||||
expect(result[:title]).to eq('Mon Entreprise')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the page fails to load' do
|
||||
before { stub_request(:get, url).to_return(status: 500, body: '') }
|
||||
|
||||
it 'returns nil' do
|
||||
expect(described_class.new(url).perform).to be_nil
|
||||
it 'returns nil and sets http_status' do
|
||||
service = described_class.new(email)
|
||||
expect(service.perform).to be_nil
|
||||
expect(service.http_status).to eq(500)
|
||||
end
|
||||
end
|
||||
|
||||
@@ -83,18 +87,7 @@ RSpec.describe WebsiteBrandingService do
|
||||
|
||||
it 'logs the error and returns nil' do
|
||||
expect(Rails.logger).to receive(:error).with(/connection refused/)
|
||||
expect(described_class.new(url).perform).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
context 'when URL has no scheme' do
|
||||
before do
|
||||
stub_request(:get, 'https://example.com').to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
|
||||
end
|
||||
|
||||
it 'prepends https://' do
|
||||
result = described_class.new('example.com').perform
|
||||
expect(result[:business_name]).to eq('Acme Corp')
|
||||
expect(described_class.new(email).perform).to be_nil
|
||||
end
|
||||
end
|
||||
|
||||
@@ -109,8 +102,9 @@ RSpec.describe WebsiteBrandingService do
|
||||
end
|
||||
|
||||
it 'extracts phone from query param' do
|
||||
result = described_class.new(url).perform
|
||||
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
|
||||
result = described_class.new(email).perform
|
||||
whatsapp = result[:socials].find { |s| s[:type] == 'whatsapp' }
|
||||
expect(whatsapp[:url]).to eq('https://wa.me/5511999999999')
|
||||
end
|
||||
end
|
||||
|
||||
@@ -128,9 +122,10 @@ RSpec.describe WebsiteBrandingService do
|
||||
end
|
||||
|
||||
it 'does not match lookalike domains' do
|
||||
result = described_class.new(url).perform
|
||||
expect(result[:social_handles][:facebook]).to be_nil
|
||||
expect(result[:social_handles][:instagram]).to be_nil
|
||||
result = described_class.new(email).perform
|
||||
types = result[:socials].map { |s| s[:type] }
|
||||
expect(types).not_to include('facebook')
|
||||
expect(types).not_to include('instagram')
|
||||
end
|
||||
end
|
||||
|
||||
@@ -148,8 +143,8 @@ RSpec.describe WebsiteBrandingService do
|
||||
end
|
||||
|
||||
it 'resolves the relative favicon URL' do
|
||||
result = described_class.new(url).perform
|
||||
expect(result[:branding][:favicon]).to eq('https://example.com/favicon.ico')
|
||||
result = described_class.new(email).perform
|
||||
expect(result[:logos].first[:url]).to eq('https://example.com/favicon.ico')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user