feat: account enrichment using context.dev [UPM-27] (#13978)

## Account branding enrichment during signup

This PR does the following

### Replace Firecrawl with Context.dev

Switches the enterprise brand lookup from Firecrawl to Context.dev for
better data quality, built-in caching, and automatic filtering of
free/disposable email providers. The service interface changes from URL
to email input to match Context.dev's email endpoint. OSS still falls
back to basic HTML scraping with a normalized output shape across both
paths.

The enterprise path intentionally does not fall back to HTML scraping on
failure — speed matters more than completeness. We want the user on the
editable onboarding form fast, and a slow fallback scrape is worse than
letting them fill it in.

Requires `CONTEXT_DEV_API_KEY` in Super Admin → App Config. Without it,
falls back to OSS HTML scraping.

### Add job to enrich account details

After account creation, `Account::BrandingEnrichmentJob` looks up the
signup email and pre-fills the account name, colors, logos, social
links, and industry into `custom_attributes['brand_info']`.

The job signals completion via a short-lived Redis key (30s TTL) + an
ActionCable broadcast (`account.enrichment_completed`). The Redis key
lets the frontend distinguish "still running" from "finished with no
results."
This commit is contained in:
Shivam Mishra
2026-04-08 11:16:52 +05:30
committed by GitHub
parent 871f2f4d56
commit e5107604a0
10 changed files with 250 additions and 288 deletions

View File

@@ -7,164 +7,111 @@ end
RSpec.describe Enterprise::WebsiteBrandingService do
describe '#perform' do
subject(:service) { test_klass.new(url) }
subject(:service) { test_klass.new(email) }
let(:url) { 'https://example.com' }
let(:api_key) { 'test-firecrawl-api-key' }
let(:scrape_endpoint) { described_class::FIRECRAWL_SCRAPE_ENDPOINT }
let(:fallback_html) { '<html lang="en"><head><title>Fallback</title></head><body></body></html>' }
let(:email) { 'user@example.com' }
let(:api_key) { 'test-context-dev-api-key' }
let(:endpoint) { described_class::CONTEXT_DEV_ENDPOINT }
let(:fallback_html) { '<html><head><title>Fallback</title></head><body></body></html>' }
let(:success_response_body) do
{
success: true,
data: {
json: {
business_name: 'Acme Corp',
language: 'en',
industry_category: 'Technology'
},
branding: {
images: { logo: 'https://example.com/logo.png', favicon: 'https://example.com/favicon.png' },
colors: { primary: '#FF5733' }
},
links: [
'https://example.com/about',
'https://facebook.com/acmecorp',
'https://instagram.com/acme_corp',
'https://wa.me/1234567890',
'https://t.me/acmecorp',
'https://tiktok.com/@acmetok'
]
status: 'ok',
code: 200,
brand: {
domain: 'example.com',
title: 'Acme Corp',
description: 'Leading tech company',
slogan: 'We build things',
is_nsfw: false,
colors: [{ hex: '#FF5733', name: 'Orange Red' }],
logos: [{ url: 'https://media.brand.dev/logo.png', type: 'icon', mode: 'light',
colors: [{ hex: '#FF5733', name: 'Orange Red' }],
resolution: { width: 256, height: 256, aspect_ratio: 1 } }],
socials: [
{ type: 'facebook', url: 'https://facebook.com/acmecorp' },
{ type: 'instagram', url: 'https://instagram.com/acme_corp' }
],
industries: {
eic: [{ industry: 'Technology', subindustry: 'Software' }]
}
}
}.to_json
end
before do
stub_request(:get, url).to_return(status: 200, body: fallback_html, headers: { 'content-type' => 'text/html' })
stub_request(:get, 'https://example.com').to_return(status: 200, body: fallback_html,
headers: { 'content-type' => 'text/html' })
end
context 'when firecrawl is configured and API returns success' do
context 'when context.dev is configured and API returns success' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
stub_request(:post, scrape_endpoint)
.with(headers: { 'Authorization' => "Bearer #{api_key}", 'Content-Type' => 'application/json' })
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
stub_request(:get, endpoint)
.with(query: { email: email }, headers: { 'Authorization' => "Bearer #{api_key}" })
.to_return(status: 200, body: success_response_body, headers: { 'content-type' => 'application/json' })
end
it 'returns business info and branding from firecrawl' do
it 'returns basic brand info' do
result = service.perform
expect(result).to eq({
business_name: 'Acme Corp',
language: 'en',
industry_category: 'Technology',
social_handles: {
whatsapp: '1234567890',
line: nil,
facebook: 'acmecorp',
instagram: 'acme_corp',
telegram: 'acmecorp',
tiktok: '@acmetok'
},
branding: {
favicon: 'https://example.com/favicon.png',
primary_color: '#FF5733'
}
})
expect(result).to include(domain: 'example.com', title: 'Acme Corp', description: 'Leading tech company',
slogan: 'We build things', is_nsfw: false, email: email)
end
it 'returns colors, logos, socials, and industries' do
result = service.perform
expect(result[:colors]).to eq([{ hex: '#FF5733', name: 'Orange Red' }])
expect(result[:logos].first[:url]).to eq('https://media.brand.dev/logo.png')
expect(result[:socials]).to eq([{ type: 'facebook', url: 'https://facebook.com/acmecorp' },
{ type: 'instagram', url: 'https://instagram.com/acme_corp' }])
expect(result[:industries]).to eq([{ industry: 'Technology', subindustry: 'Software' }])
end
end
context 'when firecrawl API returns an error' do
context 'when context.dev API returns an error' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
stub_request(:post, scrape_endpoint)
.to_return(status: 422, body: '{"error": "Invalid URL"}', headers: {})
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
stub_request(:get, endpoint)
.with(query: { email: email })
.to_return(status: 422, body: '{"error": "FREE_EMAIL_DETECTED"}')
end
it 'falls back to basic scrape' do
result = service.perform
expect(result[:business_name]).to eq('Fallback')
expect(result[:industry_category]).to be_nil
it 'returns nil' do
expect(service.perform).to be_nil
end
end
context 'when firecrawl raises an exception' do
context 'when context.dev raises an exception' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
stub_request(:post, scrape_endpoint).to_raise(StandardError.new('connection refused'))
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
stub_request(:get, endpoint).with(query: { email: email }).to_raise(StandardError.new('connection refused'))
end
it 'falls back to basic scrape' do
result = service.perform
expect(result[:business_name]).to eq('Fallback')
it 'returns nil' do
expect(service.perform).to be_nil
end
end
context 'when firecrawl is not configured' do
it 'uses basic scrape' do
expect(HTTParty).not_to receive(:post)
context 'when context.dev is not configured' do
it 'falls back to base scraper' do
result = service.perform
expect(result[:business_name]).to eq('Fallback')
expect(result[:title]).to eq('Fallback')
expect(result[:industries]).to eq([])
end
end
context 'when WhatsApp link uses api.whatsapp.com format' do
context 'when context.dev returns empty brand' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
response = {
success: true,
data: {
json: { business_name: 'Acme Corp' },
links: ['https://api.whatsapp.com/send?phone=5511999999999&text=Hello']
}
}.to_json
stub_request(:post, scrape_endpoint)
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key)
stub_request(:get, endpoint)
.with(query: { email: email })
.to_return(status: 200, body: { status: 'ok', code: 200, brand: nil }.to_json,
headers: { 'content-type' => 'application/json' })
end
it 'extracts phone number from query param' do
result = service.perform
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
end
end
context 'when WhatsApp link uses wa.me format' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
response = {
success: true,
data: {
json: { business_name: 'Acme Corp' },
links: ['https://wa.me/+5511999999999']
}
}.to_json
stub_request(:post, scrape_endpoint)
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
end
it 'extracts phone number from path' do
result = service.perform
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
end
end
context 'when links contain lookalike domains' do
before do
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
response = {
success: true,
data: {
json: { business_name: 'Acme Corp' },
links: ['https://notfacebook.com/page', 'https://fakeinstagram.com/user']
}
}.to_json
stub_request(:post, scrape_endpoint)
.to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' })
end
it 'does not match lookalike domains' do
result = service.perform
expect(result[:social_handles][:facebook]).to be_nil
expect(result[:social_handles][:instagram]).to be_nil
it 'returns nil' do
expect(service.perform).to be_nil
end
end
end

View File

@@ -2,6 +2,7 @@ require 'rails_helper'
RSpec.describe WebsiteBrandingService do
describe '#perform' do
let(:email) { 'user@example.com' }
let(:url) { 'https://example.com' }
let(:html_body) do
<<~HTML
@@ -9,12 +10,21 @@ RSpec.describe WebsiteBrandingService do
<head>
<title>Acme Corp | Home</title>
<meta property="og:site_name" content="Acme Corp" />
<meta property="og:image" content="https://example.com/og-image.png" />
<meta name="theme-color" content="#FF5733" />
<link rel="icon" href="/favicon.ico" />
<link rel="shortcut icon" href="/favicon-32.png" />
<link rel="apple-touch-icon" href="/apple-touch-icon.png" />
<link rel="mask-icon" href="/safari-pinned-tab.svg" />
</head>
<body>
<header><a href="/">Home</a></header>
<header>
<a href="https://facebook.com/acmecorp">Facebook</a>
<a href="https://instagram.com/acme_corp">Instagram</a>
</header>
<nav>
<a href="https://facebook.com/acmecorp">FB</a>
<a href="https://t.me/acmecorp">TG</a>
</nav>
<footer>
<a href="https://facebook.com/acmecorp">Facebook</a>
<a href="https://instagram.com/acme_corp">Instagram</a>
@@ -31,26 +41,19 @@ RSpec.describe WebsiteBrandingService do
stub_request(:get, url).to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
end
it 'extracts business info, branding, and social handles' do
result = described_class.new(url).perform
it 'extracts basic brand info' do
result = described_class.new(email).perform
expect(result).to eq({
business_name: 'Acme Corp',
language: 'en',
industry_category: nil,
social_handles: {
whatsapp: '1234567890',
line: nil,
facebook: 'acmecorp',
instagram: 'acme_corp',
telegram: 'acmecorp',
tiktok: '@acmetok'
},
branding: {
favicon: 'https://example.com/favicon.ico',
primary_color: '#FF5733'
}
})
expect(result).to include(domain: 'example.com', title: 'Acme Corp', email: email,
description: nil, slogan: nil, is_nsfw: false, industries: [])
end
it 'extracts colors, logos, and socials' do
result = described_class.new(email).perform
expect(result[:colors]).to eq([{ hex: '#FF5733', name: nil }])
expect(result[:logos].first[:url]).to eq('https://example.com/favicon.ico')
expect(result[:socials].map { |s| s[:type] }).to contain_exactly('facebook', 'instagram', 'whatsapp', 'telegram', 'tiktok')
end
context 'when og:site_name is missing' do
@@ -64,17 +67,18 @@ RSpec.describe WebsiteBrandingService do
end
it 'falls back to the first segment of the title' do
result = described_class.new(url).perform
expect(result[:business_name]).to eq('Mon Entreprise')
expect(result[:language]).to eq('fr')
result = described_class.new(email).perform
expect(result[:title]).to eq('Mon Entreprise')
end
end
context 'when the page fails to load' do
before { stub_request(:get, url).to_return(status: 500, body: '') }
it 'returns nil' do
expect(described_class.new(url).perform).to be_nil
it 'returns nil and sets http_status' do
service = described_class.new(email)
expect(service.perform).to be_nil
expect(service.http_status).to eq(500)
end
end
@@ -83,18 +87,7 @@ RSpec.describe WebsiteBrandingService do
it 'logs the error and returns nil' do
expect(Rails.logger).to receive(:error).with(/connection refused/)
expect(described_class.new(url).perform).to be_nil
end
end
context 'when URL has no scheme' do
before do
stub_request(:get, 'https://example.com').to_return(status: 200, body: html_body, headers: { 'content-type' => 'text/html' })
end
it 'prepends https://' do
result = described_class.new('example.com').perform
expect(result[:business_name]).to eq('Acme Corp')
expect(described_class.new(email).perform).to be_nil
end
end
@@ -109,8 +102,9 @@ RSpec.describe WebsiteBrandingService do
end
it 'extracts phone from query param' do
result = described_class.new(url).perform
expect(result[:social_handles][:whatsapp]).to eq('5511999999999')
result = described_class.new(email).perform
whatsapp = result[:socials].find { |s| s[:type] == 'whatsapp' }
expect(whatsapp[:url]).to eq('https://wa.me/5511999999999')
end
end
@@ -128,9 +122,10 @@ RSpec.describe WebsiteBrandingService do
end
it 'does not match lookalike domains' do
result = described_class.new(url).perform
expect(result[:social_handles][:facebook]).to be_nil
expect(result[:social_handles][:instagram]).to be_nil
result = described_class.new(email).perform
types = result[:socials].map { |s| s[:type] }
expect(types).not_to include('facebook')
expect(types).not_to include('instagram')
end
end
@@ -148,8 +143,8 @@ RSpec.describe WebsiteBrandingService do
end
it 'resolves the relative favicon URL' do
result = described_class.new(url).perform
expect(result[:branding][:favicon]).to eq('https://example.com/favicon.ico')
result = described_class.new(email).perform
expect(result[:logos].first[:url]).to eq('https://example.com/favicon.ico')
end
end
end