feat: account enrichment using context.dev [UPM-27] (#13978)
## Account branding enrichment during signup This PR does the following ### Replace Firecrawl with Context.dev Switches the enterprise brand lookup from Firecrawl to Context.dev for better data quality, built-in caching, and automatic filtering of free/disposable email providers. The service interface changes from URL to email input to match Context.dev's email endpoint. OSS still falls back to basic HTML scraping with a normalized output shape across both paths. The enterprise path intentionally does not fall back to HTML scraping on failure — speed matters more than completeness. We want the user on the editable onboarding form fast, and a slow fallback scrape is worse than letting them fill it in. Requires `CONTEXT_DEV_API_KEY` in Super Admin → App Config. Without it, falls back to OSS HTML scraping. ### Add job to enrich account details After account creation, `Account::BrandingEnrichmentJob` looks up the signup email and pre-fills the account name, colors, logos, social links, and industry into `custom_attributes['brand_info']`. The job signals completion via a short-lived Redis key (30s TTL) + an ActionCable broadcast (`account.enrichment_completed`). The Redis key lets the frontend distinguish "still running" from "finished with no results."
This commit is contained in:
@@ -34,9 +34,9 @@ module Enterprise::SuperAdmin::AppConfigsController
|
||||
end
|
||||
|
||||
def internal_config_options
|
||||
%w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY DASHBOARD_SCRIPTS INACTIVE_WHATSAPP_NUMBERS
|
||||
SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL CHATWOOT_INSTANCE_ADMIN_EMAIL
|
||||
OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS
|
||||
%w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY CONTEXT_DEV_API_KEY DASHBOARD_SCRIPTS
|
||||
INACTIVE_WHATSAPP_NUMBERS SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL
|
||||
CHATWOOT_INSTANCE_ADMIN_EMAIL OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS
|
||||
OTEL_PROVIDER LANGFUSE_PUBLIC_KEY LANGFUSE_SECRET_KEY LANGFUSE_BASE_URL]
|
||||
end
|
||||
|
||||
|
||||
@@ -1,112 +1,63 @@
|
||||
module Enterprise::WebsiteBrandingService
|
||||
FIRECRAWL_SCRAPE_ENDPOINT = 'https://api.firecrawl.dev/v2/scrape'.freeze
|
||||
|
||||
INDUSTRY_CATEGORIES = [
|
||||
'Technology',
|
||||
'E-commerce',
|
||||
'Healthcare',
|
||||
'Education',
|
||||
'Finance',
|
||||
'Real Estate',
|
||||
'Marketing',
|
||||
'Travel & Hospitality',
|
||||
'Food & Beverage',
|
||||
'Media & Entertainment',
|
||||
'Professional Services',
|
||||
'Non-profit',
|
||||
'Other'
|
||||
].freeze
|
||||
CONTEXT_DEV_ENDPOINT = 'https://api.context.dev/v1/brand/retrieve-by-email'.freeze
|
||||
|
||||
def perform
|
||||
return super unless firecrawl_enabled?
|
||||
return super unless context_dev_enabled?
|
||||
|
||||
response = perform_firecrawl_request
|
||||
process_firecrawl_response(response)
|
||||
response = fetch_brand
|
||||
process_response(response)
|
||||
rescue StandardError => e
|
||||
Rails.logger.error "[WebsiteBranding] Firecrawl failed: #{e.message}, falling back to basic scrape"
|
||||
super
|
||||
Rails.logger.error "[WebsiteBranding] Context.dev failed: #{e.message}"
|
||||
nil
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def firecrawl_enabled?
|
||||
firecrawl_api_key.present?
|
||||
def context_dev_enabled?
|
||||
context_dev_api_key.present?
|
||||
end
|
||||
|
||||
def firecrawl_api_key
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value
|
||||
def context_dev_api_key
|
||||
InstallationConfig.find_by(name: 'CONTEXT_DEV_API_KEY')&.value
|
||||
end
|
||||
|
||||
def perform_firecrawl_request
|
||||
HTTParty.post(
|
||||
FIRECRAWL_SCRAPE_ENDPOINT,
|
||||
body: scrape_payload.to_json,
|
||||
def fetch_brand
|
||||
HTTParty.get(
|
||||
CONTEXT_DEV_ENDPOINT,
|
||||
query: { email: @email },
|
||||
headers: {
|
||||
'Authorization' => "Bearer #{firecrawl_api_key}",
|
||||
'Authorization' => "Bearer #{context_dev_api_key}",
|
||||
'Content-Type' => 'application/json'
|
||||
}
|
||||
)
|
||||
end
|
||||
|
||||
def scrape_payload
|
||||
{
|
||||
url: @url,
|
||||
onlyMainContent: false,
|
||||
formats: [
|
||||
{
|
||||
type: 'json',
|
||||
schema: extract_schema,
|
||||
prompt: 'Extract the business name, primary language, and industry category from this website.'
|
||||
},
|
||||
'branding',
|
||||
'links'
|
||||
]
|
||||
}
|
||||
end
|
||||
|
||||
def extract_schema
|
||||
{
|
||||
type: 'object',
|
||||
properties: {
|
||||
business_name: { type: 'string', description: 'The name of the business or company' },
|
||||
language: { type: 'string', description: 'Primary language as ISO 639-1 code (e.g., en, es, fr)' },
|
||||
industry_category: { type: 'string', enum: INDUSTRY_CATEGORIES, description: 'Industry category for this business' }
|
||||
},
|
||||
required: %w[business_name]
|
||||
}
|
||||
end
|
||||
|
||||
def process_firecrawl_response(response)
|
||||
def process_response(response)
|
||||
@http_status = response.code
|
||||
raise "API Error: #{response.message} (Status: #{response.code})" unless response.success?
|
||||
|
||||
format_firecrawl_response(response)
|
||||
brand = response.parsed_response&.dig('brand')
|
||||
return nil if brand.blank?
|
||||
|
||||
format_brand(brand)
|
||||
end
|
||||
|
||||
def format_firecrawl_response(response)
|
||||
data = response.parsed_response
|
||||
extract = data.dig('data', 'json') || {}
|
||||
brand = data.dig('data', 'branding') || {}
|
||||
links = data.dig('data', 'links') || []
|
||||
|
||||
def format_brand(brand)
|
||||
{
|
||||
business_name: extract['business_name'],
|
||||
language: extract['language'],
|
||||
industry_category: extract['industry_category'],
|
||||
social_handles: extract_social_from_links(links),
|
||||
branding: extract_firecrawl_branding(brand)
|
||||
}
|
||||
end
|
||||
|
||||
def extract_firecrawl_branding(brand)
|
||||
{
|
||||
favicon: url_or_nil(brand.dig('images', 'favicon')),
|
||||
primary_color: brand.dig('colors', 'primary')
|
||||
}
|
||||
end
|
||||
|
||||
def url_or_nil(value)
|
||||
return nil if value.blank? || !value.start_with?('http')
|
||||
|
||||
value
|
||||
domain: brand['domain'],
|
||||
title: brand['title'],
|
||||
description: brand['description'],
|
||||
slogan: brand['slogan'],
|
||||
phone: brand['phone'],
|
||||
address: brand['address'],
|
||||
colors: brand['colors'] || [],
|
||||
logos: brand['logos'] || [],
|
||||
socials: brand['socials'] || [],
|
||||
links: brand['links'],
|
||||
email: @email,
|
||||
industries: brand.dig('industries', 'eic') || [],
|
||||
stock: brand['stock'],
|
||||
is_nsfw: brand['is_nsfw'] || false
|
||||
}.deep_symbolize_keys
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user