fix: Added authentication to FireCrawl API, remove unused RobinAI references (#10737)
- Fixed Firecrawl webhook payloads to ensure proper data handling and delivery. - Removed unused Robin AI code to improve codebase cleanliness and maintainability. - Implement authentication for the Firecrawl endpoint to improve security. A key is generated to secure the webhook URLs from FireCrawl. --------- Co-authored-by: Pranav <pranavrajs@gmail.com>
This commit is contained in:
@@ -1,57 +1,120 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe 'Firecrawl Webhooks', type: :request do
|
||||
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id' do
|
||||
let(:assistant_id) { 'asst_123' }
|
||||
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id&token=:token' do
|
||||
let!(:api_key) { create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test_api_key_123') }
|
||||
let!(:account) { create(:account) }
|
||||
let!(:assistant) { create(:captain_assistant, account: account) }
|
||||
|
||||
let(:payload_data) do
|
||||
{
|
||||
'markdown' => 'hello world',
|
||||
'metadata' => {
|
||||
'ogUrl' => 'https://example.com'
|
||||
}
|
||||
markdown: 'hello world',
|
||||
metadata: { ogUrl: 'https://example.com' }
|
||||
}
|
||||
end
|
||||
|
||||
context 'with crawl.page event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
data: payload_data,
|
||||
type: 'crawl.page'
|
||||
}
|
||||
# Generate actual token using the helper
|
||||
let(:valid_token) do
|
||||
token_base = "#{api_key.value[-4..]}#{assistant.id}#{assistant.account_id}"
|
||||
Digest::SHA256.hexdigest(token_base)
|
||||
end
|
||||
|
||||
context 'with valid token' do
|
||||
context 'with crawl.page event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
type: 'crawl.page',
|
||||
data: [payload_data]
|
||||
}
|
||||
end
|
||||
|
||||
it 'processes the webhook and returns success' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).to receive(:perform_later)
|
||||
.with(
|
||||
assistant_id: assistant.id,
|
||||
payload: payload_data
|
||||
)
|
||||
|
||||
post(
|
||||
"/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: valid_params,
|
||||
as: :json
|
||||
)
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
end
|
||||
end
|
||||
|
||||
it 'processes the webhook and returns success' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).to(
|
||||
receive(:perform_later)
|
||||
.with(
|
||||
assistant_id: assistant_id,
|
||||
payload: payload_data
|
||||
)
|
||||
)
|
||||
context 'with crawl.completed event type' do
|
||||
let(:valid_params) do
|
||||
{
|
||||
type: 'crawl.completed'
|
||||
}
|
||||
end
|
||||
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
|
||||
params: valid_params,
|
||||
as: :json)
|
||||
it 'returns success without enqueuing job' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: valid_params,
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'with crawl.completed event type' do
|
||||
let(:valid_params) do
|
||||
{ type: 'crawl.completed' }
|
||||
context 'with invalid token' do
|
||||
let(:invalid_params) do
|
||||
{
|
||||
type: 'crawl.page',
|
||||
data: [payload_data]
|
||||
}
|
||||
end
|
||||
|
||||
it 'returns success without enqueuing job' do
|
||||
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
|
||||
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
|
||||
params: valid_params,
|
||||
it 'returns unauthorized status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=invalid_token",
|
||||
params: invalid_params,
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:ok)
|
||||
expect(response.body).to be_empty
|
||||
expect(response).to have_http_status(:unauthorized)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with invalid assistant_id' do
|
||||
context 'with non-existent assistant_id' do
|
||||
it 'returns not found status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=invalid_id&token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:not_found)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with nil assistant_id' do
|
||||
it 'returns not found status' do
|
||||
post("/enterprise/webhooks/firecrawl?token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:not_found)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
context 'when CAPTAIN_FIRECRAWL_API_KEY is not configured' do
|
||||
before do
|
||||
api_key.destroy
|
||||
end
|
||||
|
||||
it 'returns unauthorized status' do
|
||||
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
|
||||
params: { type: 'crawl.page', data: [payload_data] },
|
||||
as: :json)
|
||||
|
||||
expect(response).to have_http_status(:unauthorized)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -8,21 +8,61 @@ RSpec.describe Captain::Documents::CrawlJob, type: :job do
|
||||
describe '#perform' do
|
||||
context 'when CAPTAIN_FIRECRAWL_API_KEY is configured' do
|
||||
let(:firecrawl_service) { instance_double(Captain::Tools::FirecrawlService) }
|
||||
let(:account) { document.account }
|
||||
let(:token) { Digest::SHA256.hexdigest("-key#{document.assistant_id}#{document.account_id}") }
|
||||
|
||||
before do
|
||||
allow(Captain::Tools::FirecrawlService).to receive(:new).and_return(firecrawl_service)
|
||||
allow(firecrawl_service).to receive(:perform)
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test-key')
|
||||
end
|
||||
|
||||
it 'uses FirecrawlService to crawl the page' do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test-key')
|
||||
context 'with account usage limits' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { available: 20 } } })
|
||||
end
|
||||
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}"
|
||||
)
|
||||
it 'uses FirecrawlService with the correct crawl limit' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
20
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when crawl limit exceeds maximum' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({ captain: { documents: { available: 1000 } } })
|
||||
end
|
||||
|
||||
it 'caps the crawl limit at 500' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
500
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
|
||||
context 'with no usage limits configured' do
|
||||
before do
|
||||
allow(account).to receive(:usage_limits).and_return({})
|
||||
end
|
||||
|
||||
it 'uses default crawl limit of 10' do
|
||||
expect(firecrawl_service).to receive(:perform).with(
|
||||
document.external_link,
|
||||
"#{webhook_url}?assistant_id=#{assistant_id}&token=#{token}",
|
||||
10
|
||||
)
|
||||
|
||||
described_class.perform_now(document)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -7,9 +7,9 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
{
|
||||
markdown: 'Launch Week I is here! 🚀',
|
||||
metadata: {
|
||||
title: 'Home - Firecrawl',
|
||||
ogTitle: 'Firecrawl',
|
||||
ogUrl: 'https://www.firecrawl.dev/'
|
||||
'title' => 'Home - Firecrawl',
|
||||
'ogTitle' => 'Firecrawl',
|
||||
'url' => 'https://www.firecrawl.dev/'
|
||||
}
|
||||
}
|
||||
end
|
||||
@@ -22,8 +22,8 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
document = assistant.documents.last
|
||||
expect(document).to have_attributes(
|
||||
content: payload[:markdown],
|
||||
name: payload[:metadata][:ogTitle],
|
||||
external_link: payload[:metadata][:ogUrl],
|
||||
name: payload[:metadata]['title'],
|
||||
external_link: payload[:metadata]['url'],
|
||||
status: 'available'
|
||||
)
|
||||
end
|
||||
@@ -32,7 +32,7 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
existing_document = create(:captain_document,
|
||||
assistant: assistant,
|
||||
account: assistant.account,
|
||||
external_link: payload[:metadata][:ogUrl],
|
||||
external_link: payload[:metadata]['url'],
|
||||
content: 'old content',
|
||||
name: 'old title',
|
||||
status: :in_progress)
|
||||
@@ -44,7 +44,7 @@ RSpec.describe Captain::Tools::FirecrawlParserJob, type: :job do
|
||||
existing_document.reload
|
||||
expect(existing_document).to have_attributes(
|
||||
content: payload[:markdown],
|
||||
name: payload[:metadata][:ogTitle],
|
||||
name: payload[:metadata]['title'],
|
||||
status: 'available'
|
||||
)
|
||||
end
|
||||
|
||||
134
spec/enterprise/services/captain/tools/firecrawl_service_spec.rb
Normal file
134
spec/enterprise/services/captain/tools/firecrawl_service_spec.rb
Normal file
@@ -0,0 +1,134 @@
|
||||
require 'rails_helper'
|
||||
|
||||
RSpec.describe Captain::Tools::FirecrawlService do
|
||||
let(:api_key) { 'test-api-key' }
|
||||
let(:url) { 'https://example.com' }
|
||||
let(:webhook_url) { 'https://webhook.example.com/callback' }
|
||||
let(:crawl_limit) { 15 }
|
||||
|
||||
before do
|
||||
create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: api_key)
|
||||
end
|
||||
|
||||
describe '#initialize' do
|
||||
context 'when API key is configured' do
|
||||
it 'initializes successfully' do
|
||||
expect { described_class.new }.not_to raise_error
|
||||
end
|
||||
end
|
||||
|
||||
context 'when API key is missing' do
|
||||
before do
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY').destroy
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { described_class.new }.to raise_error(ActiveRecord::RecordNotFound)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when API key is nil' do
|
||||
before do
|
||||
InstallationConfig.find_by(name: 'CAPTAIN_FIRECRAWL_API_KEY').update(value: nil)
|
||||
end
|
||||
|
||||
it 'raises an error' do
|
||||
expect { described_class.new }.to raise_error('Missing API key')
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
describe '#perform' do
|
||||
let(:service) { described_class.new }
|
||||
let(:expected_payload) do
|
||||
{
|
||||
url: url,
|
||||
maxDepth: 50,
|
||||
ignoreSitemap: false,
|
||||
limit: crawl_limit,
|
||||
webhook: webhook_url,
|
||||
scrapeOptions: {
|
||||
onlyMainContent: false,
|
||||
formats: ['markdown'],
|
||||
excludeTags: ['iframe']
|
||||
}
|
||||
}.to_json
|
||||
end
|
||||
|
||||
let(:expected_headers) do
|
||||
{
|
||||
'Authorization' => "Bearer #{api_key}",
|
||||
'Content-Type' => 'application/json'
|
||||
}
|
||||
end
|
||||
|
||||
context 'when the API call is successful' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
.to_return(status: 200, body: '{"status": "success"}')
|
||||
end
|
||||
|
||||
it 'makes a POST request with correct parameters' do
|
||||
service.perform(url, webhook_url, crawl_limit)
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
|
||||
it 'uses default crawl limit when not specified' do
|
||||
default_payload = expected_payload.gsub(crawl_limit.to_s, '10')
|
||||
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: default_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
.to_return(status: 200, body: '{"status": "success"}')
|
||||
|
||||
service.perform(url, webhook_url)
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: default_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the API call fails' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.to_raise(StandardError.new('Connection failed'))
|
||||
end
|
||||
|
||||
it 'raises an error with the failure message' do
|
||||
expect { service.perform(url, webhook_url, crawl_limit) }
|
||||
.to raise_error('Failed to crawl URL: Connection failed')
|
||||
end
|
||||
end
|
||||
|
||||
context 'when the API returns an error response' do
|
||||
before do
|
||||
stub_request(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.to_return(status: 422, body: '{"error": "Invalid URL"}')
|
||||
end
|
||||
|
||||
it 'makes the request but does not raise an error' do
|
||||
expect { service.perform(url, webhook_url, crawl_limit) }.not_to raise_error
|
||||
|
||||
expect(WebMock).to have_requested(:post, 'https://api.firecrawl.dev/v1/crawl')
|
||||
.with(
|
||||
body: expected_payload,
|
||||
headers: expected_headers
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user