diff --git a/app/controllers/api/v1/accounts_controller.rb b/app/controllers/api/v1/accounts_controller.rb index 2d14fe7ca..7176d6e1b 100644 --- a/app/controllers/api/v1/accounts_controller.rb +++ b/app/controllers/api/v1/accounts_controller.rb @@ -30,6 +30,7 @@ class Api::V1::AccountsController < Api::BaseController locale: account_params[:locale], user: current_user ).perform + enqueue_branding_enrichment if @user # Authenticated users (dashboard "add account") and api_only signups # need the full response with account_id. API-only deployments have no @@ -69,6 +70,16 @@ class Api::V1::AccountsController < Api::BaseController private + def enqueue_branding_enrichment + return if account_params[:email].blank? + + Account::BrandingEnrichmentJob.perform_later(@account.id, account_params[:email]) + Redis::Alfred.set(format(Redis::Alfred::ACCOUNT_ONBOARDING_ENRICHMENT, account_id: @account.id), '1', ex: 30) + rescue StandardError => e + # Enrichment is optional โ€” never let queue/Redis failures abort signup + ChatwootExceptionTracker.new(e).capture_exception + end + def ensure_account_name # ensure that account_name and user_full_name is present # this is becuase the account builder and the models validations are not triggered diff --git a/app/jobs/account/branding_enrichment_job.rb b/app/jobs/account/branding_enrichment_job.rb new file mode 100644 index 000000000..2898604ca --- /dev/null +++ b/app/jobs/account/branding_enrichment_job.rb @@ -0,0 +1,32 @@ +class Account::BrandingEnrichmentJob < ApplicationJob + queue_as :low + + def perform(account_id, email) + result = WebsiteBrandingService.new(email).perform + return if result.blank? + + account = Account.find(account_id) + account.name = result[:title] if result[:title].present? + account.custom_attributes['brand_info'] = result if account.custom_attributes['brand_info'].blank? + account.save! if account.changed? + ensure + finish_enrichment(account_id) + end + + private + + def finish_enrichment(account_id) + Redis::Alfred.delete(format(Redis::Alfred::ACCOUNT_ONBOARDING_ENRICHMENT, account_id: account_id)) + + account = Account.find(account_id) + if account.custom_attributes['onboarding_step'] == 'enrichment' + account.custom_attributes['onboarding_step'] = 'account_details' + account.save! + end + + user = account.administrators.first + return unless user + + ActionCableBroadcastJob.perform_later([user.pubsub_token], 'account.enrichment_completed', { account_id: account_id }) + end +end diff --git a/app/services/concerns/social_link_parser.rb b/app/services/social_link_parser.rb similarity index 100% rename from app/services/concerns/social_link_parser.rb rename to app/services/social_link_parser.rb diff --git a/app/services/website_branding_service.rb b/app/services/website_branding_service.rb index 89326e4b6..3592267ff 100644 --- a/app/services/website_branding_service.rb +++ b/app/services/website_branding_service.rb @@ -1,8 +1,15 @@ class WebsiteBrandingService include SocialLinkParser - def initialize(url) - @url = normalize_url(url) + attr_reader :http_status + + DATA_DEFAULTS = { description: nil, slogan: nil, phone: nil, address: nil, links: nil, stock: nil, industries: [], is_nsfw: false }.freeze + + def initialize(email) + @email = email + @domain = email.split('@').last&.downcase&.strip + @url = "https://#{@domain}" + @http_status = nil end def perform @@ -11,13 +18,14 @@ class WebsiteBrandingService links = extract_links(doc) - { - business_name: extract_business_name(doc), - language: extract_language(doc), - industry_category: nil, - social_handles: extract_social_from_links(links), - branding: extract_branding(doc) - } + DATA_DEFAULTS.merge({ + domain: @domain, + title: extract_title(doc), + colors: extract_colors(doc), + logos: extract_logos(doc), + socials: build_socials(links), + email: @email + }) rescue StandardError => e Rails.logger.error "[WebsiteBranding] #{e.message}" nil @@ -25,12 +33,9 @@ class WebsiteBrandingService private - def normalize_url(url) - url.match?(%r{\Ahttps?://}) ? url : "https://#{url}" - end - def fetch_page response = HTTParty.get(@url, follow_redirects: true, timeout: 15) + @http_status = response.code return nil unless response.success? Nokogiri::HTML(response.body) @@ -39,7 +44,7 @@ class WebsiteBrandingService nil end - def extract_business_name(doc) + def extract_title(doc) og_site_name = doc.at_css('meta[property="og:site_name"]')&.[]('content') return og_site_name.strip if og_site_name.present? @@ -47,8 +52,37 @@ class WebsiteBrandingService title&.strip&.split(/\s*[|\-โ€“โ€”ยท:]+\s*/)&.first end - def extract_language(doc) - doc.at_css('html')&.[]('lang')&.split('-')&.first&.downcase + def extract_colors(doc) + color = doc.at_css('meta[name="theme-color"]')&.[]('content') + return [] if color.blank? + + [{ hex: color, name: nil }] + end + + def extract_logos(doc) + favicon = doc.at_css('link[rel*="icon"]')&.[]('href') + return [] if favicon.blank? + + url = resolve_url(favicon) + return [] if url.blank? + + [{ url: url, type: nil, mode: nil, colors: [], resolution: { aspect_ratio: 1 } }] + end + + def build_socials(links) + handles = extract_social_from_links(links) + handles.filter_map do |platform, handle| + next if handle.blank? + + url = reconstruct_social_url(platform, handle) + { type: platform.to_s, url: url } + end + end + + def reconstruct_social_url(platform, handle) + base_urls = { whatsapp: 'https://wa.me/', line: 'https://line.me/', facebook: 'https://facebook.com/', + instagram: 'https://instagram.com/', telegram: 'https://t.me/', tiktok: 'https://tiktok.com/' } + "#{base_urls[platform]}#{handle}" end def extract_links(doc) @@ -62,24 +96,6 @@ class WebsiteBrandingService end.uniq end - def extract_branding(doc) - { - favicon: extract_favicon(doc), - primary_color: extract_theme_color(doc) - } - end - - def extract_favicon(doc) - favicon = doc.at_css('link[rel*="icon"]')&.[]('href') - return nil if favicon.blank? - - resolve_url(favicon) - end - - def extract_theme_color(doc) - doc.at_css('meta[name="theme-color"]')&.[]('content') - end - def resolve_url(url) return nil if url.blank? return url if url.start_with?('http') diff --git a/config/installation_config.yml b/config/installation_config.yml index 34cb736bf..884dd2e58 100644 --- a/config/installation_config.yml +++ b/config/installation_config.yml @@ -211,6 +211,13 @@ type: code # End of Captain Config +# ------- Context.dev Config ------- # +- name: CONTEXT_DEV_API_KEY + display_title: 'Context.dev API Key' + description: 'API key for Context.dev branding service used during account onboarding' + type: secret +# ------- End of Context.dev Config ------- # + # ------- Chatwoot Internal Config for Cloud ----# - name: CHATWOOT_INBOX_TOKEN value: diff --git a/enterprise/app/controllers/enterprise/super_admin/app_configs_controller.rb b/enterprise/app/controllers/enterprise/super_admin/app_configs_controller.rb index 87ac8f6d6..f91f12708 100644 --- a/enterprise/app/controllers/enterprise/super_admin/app_configs_controller.rb +++ b/enterprise/app/controllers/enterprise/super_admin/app_configs_controller.rb @@ -34,9 +34,9 @@ module Enterprise::SuperAdmin::AppConfigsController end def internal_config_options - %w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY DASHBOARD_SCRIPTS INACTIVE_WHATSAPP_NUMBERS - SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL CHATWOOT_INSTANCE_ADMIN_EMAIL - OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS + %w[CHATWOOT_INBOX_TOKEN CHATWOOT_INBOX_HMAC_KEY CLOUD_ANALYTICS_TOKEN CLEARBIT_API_KEY CONTEXT_DEV_API_KEY DASHBOARD_SCRIPTS + INACTIVE_WHATSAPP_NUMBERS SKIP_INCOMING_BCC_PROCESSING CAPTAIN_CLOUD_PLAN_LIMITS ACCOUNT_SECURITY_NOTIFICATION_WEBHOOK_URL + CHATWOOT_INSTANCE_ADMIN_EMAIL OG_IMAGE_CDN_URL OG_IMAGE_CLIENT_REF CLOUDFLARE_API_KEY CLOUDFLARE_ZONE_ID BLOCKED_EMAIL_DOMAINS OTEL_PROVIDER LANGFUSE_PUBLIC_KEY LANGFUSE_SECRET_KEY LANGFUSE_BASE_URL] end diff --git a/enterprise/app/services/enterprise/website_branding_service.rb b/enterprise/app/services/enterprise/website_branding_service.rb index 6efdd5051..a1925e80d 100644 --- a/enterprise/app/services/enterprise/website_branding_service.rb +++ b/enterprise/app/services/enterprise/website_branding_service.rb @@ -1,112 +1,63 @@ module Enterprise::WebsiteBrandingService - FIRECRAWL_SCRAPE_ENDPOINT = 'https://api.firecrawl.dev/v2/scrape'.freeze - - INDUSTRY_CATEGORIES = [ - 'Technology', - 'E-commerce', - 'Healthcare', - 'Education', - 'Finance', - 'Real Estate', - 'Marketing', - 'Travel & Hospitality', - 'Food & Beverage', - 'Media & Entertainment', - 'Professional Services', - 'Non-profit', - 'Other' - ].freeze + CONTEXT_DEV_ENDPOINT = 'https://api.context.dev/v1/brand/retrieve-by-email'.freeze def perform - return super unless firecrawl_enabled? + return super unless context_dev_enabled? - response = perform_firecrawl_request - process_firecrawl_response(response) + response = fetch_brand + process_response(response) rescue StandardError => e - Rails.logger.error "[WebsiteBranding] Firecrawl failed: #{e.message}, falling back to basic scrape" - super + Rails.logger.error "[WebsiteBranding] Context.dev failed: #{e.message}" + nil end private - def firecrawl_enabled? - firecrawl_api_key.present? + def context_dev_enabled? + context_dev_api_key.present? end - def firecrawl_api_key - InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY')&.value + def context_dev_api_key + InstallationConfig.find_by(name: 'CONTEXT_DEV_API_KEY')&.value end - def perform_firecrawl_request - HTTParty.post( - FIRECRAWL_SCRAPE_ENDPOINT, - body: scrape_payload.to_json, + def fetch_brand + HTTParty.get( + CONTEXT_DEV_ENDPOINT, + query: { email: @email }, headers: { - 'Authorization' => "Bearer #{firecrawl_api_key}", + 'Authorization' => "Bearer #{context_dev_api_key}", 'Content-Type' => 'application/json' } ) end - def scrape_payload - { - url: @url, - onlyMainContent: false, - formats: [ - { - type: 'json', - schema: extract_schema, - prompt: 'Extract the business name, primary language, and industry category from this website.' - }, - 'branding', - 'links' - ] - } - end - - def extract_schema - { - type: 'object', - properties: { - business_name: { type: 'string', description: 'The name of the business or company' }, - language: { type: 'string', description: 'Primary language as ISO 639-1 code (e.g., en, es, fr)' }, - industry_category: { type: 'string', enum: INDUSTRY_CATEGORIES, description: 'Industry category for this business' } - }, - required: %w[business_name] - } - end - - def process_firecrawl_response(response) + def process_response(response) + @http_status = response.code raise "API Error: #{response.message} (Status: #{response.code})" unless response.success? - format_firecrawl_response(response) + brand = response.parsed_response&.dig('brand') + return nil if brand.blank? + + format_brand(brand) end - def format_firecrawl_response(response) - data = response.parsed_response - extract = data.dig('data', 'json') || {} - brand = data.dig('data', 'branding') || {} - links = data.dig('data', 'links') || [] - + def format_brand(brand) { - business_name: extract['business_name'], - language: extract['language'], - industry_category: extract['industry_category'], - social_handles: extract_social_from_links(links), - branding: extract_firecrawl_branding(brand) - } - end - - def extract_firecrawl_branding(brand) - { - favicon: url_or_nil(brand.dig('images', 'favicon')), - primary_color: brand.dig('colors', 'primary') - } - end - - def url_or_nil(value) - return nil if value.blank? || !value.start_with?('http') - - value + domain: brand['domain'], + title: brand['title'], + description: brand['description'], + slogan: brand['slogan'], + phone: brand['phone'], + address: brand['address'], + colors: brand['colors'] || [], + logos: brand['logos'] || [], + socials: brand['socials'] || [], + links: brand['links'], + email: @email, + industries: brand.dig('industries', 'eic') || [], + stock: brand['stock'], + is_nsfw: brand['is_nsfw'] || false + }.deep_symbolize_keys end end diff --git a/lib/redis/redis_keys.rb b/lib/redis/redis_keys.rb index 8c9361ab5..59e33036d 100644 --- a/lib/redis/redis_keys.rb +++ b/lib/redis/redis_keys.rb @@ -50,6 +50,9 @@ module Redis::RedisKeys ASSIGNMENT_KEY = 'ASSIGNMENT::%d::AGENT::%d::CONVERSATION::%d'.freeze ASSIGNMENT_KEY_PATTERN = 'ASSIGNMENT::%d::AGENT::%d::*'.freeze + ## Account Onboarding + ACCOUNT_ONBOARDING_ENRICHMENT = 'ONBOARDING_ENRICHMENT::%d'.freeze + ## Account Email Rate Limiting ACCOUNT_OUTBOUND_EMAIL_COUNT_KEY = 'OUTBOUND_EMAIL_COUNT::%d::%s'.freeze end diff --git a/spec/enterprise/services/enterprise/website_branding_service_spec.rb b/spec/enterprise/services/enterprise/website_branding_service_spec.rb index 0907db518..64b6dfb53 100644 --- a/spec/enterprise/services/enterprise/website_branding_service_spec.rb +++ b/spec/enterprise/services/enterprise/website_branding_service_spec.rb @@ -7,164 +7,111 @@ end RSpec.describe Enterprise::WebsiteBrandingService do describe '#perform' do - subject(:service) { test_klass.new(url) } + subject(:service) { test_klass.new(email) } - let(:url) { 'https://example.com' } - let(:api_key) { 'test-firecrawl-api-key' } - let(:scrape_endpoint) { described_class::FIRECRAWL_SCRAPE_ENDPOINT } - let(:fallback_html) { 'Fallback' } + let(:email) { 'user@example.com' } + let(:api_key) { 'test-context-dev-api-key' } + let(:endpoint) { described_class::CONTEXT_DEV_ENDPOINT } + let(:fallback_html) { 'Fallback' } let(:success_response_body) do { - success: true, - data: { - json: { - business_name: 'Acme Corp', - language: 'en', - industry_category: 'Technology' - }, - branding: { - images: { logo: 'https://example.com/logo.png', favicon: 'https://example.com/favicon.png' }, - colors: { primary: '#FF5733' } - }, - links: [ - 'https://example.com/about', - 'https://facebook.com/acmecorp', - 'https://instagram.com/acme_corp', - 'https://wa.me/1234567890', - 'https://t.me/acmecorp', - 'https://tiktok.com/@acmetok' - ] + status: 'ok', + code: 200, + brand: { + domain: 'example.com', + title: 'Acme Corp', + description: 'Leading tech company', + slogan: 'We build things', + is_nsfw: false, + colors: [{ hex: '#FF5733', name: 'Orange Red' }], + logos: [{ url: 'https://media.brand.dev/logo.png', type: 'icon', mode: 'light', + colors: [{ hex: '#FF5733', name: 'Orange Red' }], + resolution: { width: 256, height: 256, aspect_ratio: 1 } }], + socials: [ + { type: 'facebook', url: 'https://facebook.com/acmecorp' }, + { type: 'instagram', url: 'https://instagram.com/acme_corp' } + ], + industries: { + eic: [{ industry: 'Technology', subindustry: 'Software' }] + } } }.to_json end before do - stub_request(:get, url).to_return(status: 200, body: fallback_html, headers: { 'content-type' => 'text/html' }) + stub_request(:get, 'https://example.com').to_return(status: 200, body: fallback_html, + headers: { 'content-type' => 'text/html' }) end - context 'when firecrawl is configured and API returns success' do + context 'when context.dev is configured and API returns success' do before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - stub_request(:post, scrape_endpoint) - .with(headers: { 'Authorization' => "Bearer #{api_key}", 'Content-Type' => 'application/json' }) + create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key) + stub_request(:get, endpoint) + .with(query: { email: email }, headers: { 'Authorization' => "Bearer #{api_key}" }) .to_return(status: 200, body: success_response_body, headers: { 'content-type' => 'application/json' }) end - it 'returns business info and branding from firecrawl' do + it 'returns basic brand info' do result = service.perform - expect(result).to eq({ - business_name: 'Acme Corp', - language: 'en', - industry_category: 'Technology', - social_handles: { - whatsapp: '1234567890', - line: nil, - facebook: 'acmecorp', - instagram: 'acme_corp', - telegram: 'acmecorp', - tiktok: '@acmetok' - }, - branding: { - favicon: 'https://example.com/favicon.png', - primary_color: '#FF5733' - } - }) + expect(result).to include(domain: 'example.com', title: 'Acme Corp', description: 'Leading tech company', + slogan: 'We build things', is_nsfw: false, email: email) + end + + it 'returns colors, logos, socials, and industries' do + result = service.perform + + expect(result[:colors]).to eq([{ hex: '#FF5733', name: 'Orange Red' }]) + expect(result[:logos].first[:url]).to eq('https://media.brand.dev/logo.png') + expect(result[:socials]).to eq([{ type: 'facebook', url: 'https://facebook.com/acmecorp' }, + { type: 'instagram', url: 'https://instagram.com/acme_corp' }]) + expect(result[:industries]).to eq([{ industry: 'Technology', subindustry: 'Software' }]) end end - context 'when firecrawl API returns an error' do + context 'when context.dev API returns an error' do before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - stub_request(:post, scrape_endpoint) - .to_return(status: 422, body: '{"error": "Invalid URL"}', headers: {}) + create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key) + stub_request(:get, endpoint) + .with(query: { email: email }) + .to_return(status: 422, body: '{"error": "FREE_EMAIL_DETECTED"}') end - it 'falls back to basic scrape' do - result = service.perform - expect(result[:business_name]).to eq('Fallback') - expect(result[:industry_category]).to be_nil + it 'returns nil' do + expect(service.perform).to be_nil end end - context 'when firecrawl raises an exception' do + context 'when context.dev raises an exception' do before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - stub_request(:post, scrape_endpoint).to_raise(StandardError.new('connection refused')) + create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key) + stub_request(:get, endpoint).with(query: { email: email }).to_raise(StandardError.new('connection refused')) end - it 'falls back to basic scrape' do - result = service.perform - expect(result[:business_name]).to eq('Fallback') + it 'returns nil' do + expect(service.perform).to be_nil end end - context 'when firecrawl is not configured' do - it 'uses basic scrape' do - expect(HTTParty).not_to receive(:post) + context 'when context.dev is not configured' do + it 'falls back to base scraper' do result = service.perform - expect(result[:business_name]).to eq('Fallback') + expect(result[:title]).to eq('Fallback') + expect(result[:industries]).to eq([]) end end - context 'when WhatsApp link uses api.whatsapp.com format' do + context 'when context.dev returns empty brand' do before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - response = { - success: true, - data: { - json: { business_name: 'Acme Corp' }, - links: ['https://api.whatsapp.com/send?phone=5511999999999&text=Hello'] - } - }.to_json - stub_request(:post, scrape_endpoint) - .to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' }) + create(:installation_config, name: 'CONTEXT_DEV_API_KEY', value: api_key) + stub_request(:get, endpoint) + .with(query: { email: email }) + .to_return(status: 200, body: { status: 'ok', code: 200, brand: nil }.to_json, + headers: { 'content-type' => 'application/json' }) end - it 'extracts phone number from query param' do - result = service.perform - expect(result[:social_handles][:whatsapp]).to eq('5511999999999') - end - end - - context 'when WhatsApp link uses wa.me format' do - before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - response = { - success: true, - data: { - json: { business_name: 'Acme Corp' }, - links: ['https://wa.me/+5511999999999'] - } - }.to_json - stub_request(:post, scrape_endpoint) - .to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' }) - end - - it 'extracts phone number from path' do - result = service.perform - expect(result[:social_handles][:whatsapp]).to eq('5511999999999') - end - end - - context 'when links contain lookalike domains' do - before do - create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key) - response = { - success: true, - data: { - json: { business_name: 'Acme Corp' }, - links: ['https://notfacebook.com/page', 'https://fakeinstagram.com/user'] - } - }.to_json - stub_request(:post, scrape_endpoint) - .to_return(status: 200, body: response, headers: { 'content-type' => 'application/json' }) - end - - it 'does not match lookalike domains' do - result = service.perform - expect(result[:social_handles][:facebook]).to be_nil - expect(result[:social_handles][:instagram]).to be_nil + it 'returns nil' do + expect(service.perform).to be_nil end end end diff --git a/spec/services/website_branding_service_spec.rb b/spec/services/website_branding_service_spec.rb index 19598fb59..e90da4c64 100644 --- a/spec/services/website_branding_service_spec.rb +++ b/spec/services/website_branding_service_spec.rb @@ -2,6 +2,7 @@ require 'rails_helper' RSpec.describe WebsiteBrandingService do describe '#perform' do + let(:email) { 'user@example.com' } let(:url) { 'https://example.com' } let(:html_body) do <<~HTML @@ -9,12 +10,21 @@ RSpec.describe WebsiteBrandingService do Acme Corp | Home - + + + -
Home
+
+ Facebook + Instagram +
+