fix: Added authentication to FireCrawl API, remove unused RobinAI references (#10737)

- Fixed Firecrawl webhook payloads to ensure proper data handling and
delivery.
- Removed unused Robin AI code to improve codebase cleanliness and
maintainability.
- Implement authentication for the Firecrawl endpoint to improve
security. A key is generated to secure the webhook URLs from FireCrawl.

---------

Co-authored-by: Pranav <pranavrajs@gmail.com>
This commit is contained in:
Sojan Jose
2025-01-23 07:44:25 +05:30
committed by GitHub
parent 3b366f43e6
commit be8205657e
28 changed files with 345 additions and 470 deletions

View File

@@ -1,57 +1,120 @@
require 'rails_helper'
RSpec.describe 'Firecrawl Webhooks', type: :request do
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id' do
let(:assistant_id) { 'asst_123' }
describe 'POST /enterprise/webhooks/firecrawl?assistant_id=:assistant_id&token=:token' do
let!(:api_key) { create(:installation_config, name: 'CAPTAIN_FIRECRAWL_API_KEY', value: 'test_api_key_123') }
let!(:account) { create(:account) }
let!(:assistant) { create(:captain_assistant, account: account) }
let(:payload_data) do
{
'markdown' => 'hello world',
'metadata' => {
'ogUrl' => 'https://example.com'
}
markdown: 'hello world',
metadata: { ogUrl: 'https://example.com' }
}
end
context 'with crawl.page event type' do
let(:valid_params) do
{
data: payload_data,
type: 'crawl.page'
}
# Generate actual token using the helper
let(:valid_token) do
token_base = "#{api_key.value[-4..]}#{assistant.id}#{assistant.account_id}"
Digest::SHA256.hexdigest(token_base)
end
context 'with valid token' do
context 'with crawl.page event type' do
let(:valid_params) do
{
type: 'crawl.page',
data: [payload_data]
}
end
it 'processes the webhook and returns success' do
expect(Captain::Tools::FirecrawlParserJob).to receive(:perform_later)
.with(
assistant_id: assistant.id,
payload: payload_data
)
post(
"/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
params: valid_params,
as: :json
)
expect(response).to have_http_status(:ok)
expect(response.body).to be_empty
end
end
it 'processes the webhook and returns success' do
expect(Captain::Tools::FirecrawlParserJob).to(
receive(:perform_later)
.with(
assistant_id: assistant_id,
payload: payload_data
)
)
context 'with crawl.completed event type' do
let(:valid_params) do
{
type: 'crawl.completed'
}
end
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
params: valid_params,
as: :json)
it 'returns success without enqueuing job' do
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
expect(response).to have_http_status(:ok)
expect(response.body).to be_empty
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
params: valid_params,
as: :json)
expect(response).to have_http_status(:ok)
expect(response.body).to be_empty
end
end
end
context 'with crawl.completed event type' do
let(:valid_params) do
{ type: 'crawl.completed' }
context 'with invalid token' do
let(:invalid_params) do
{
type: 'crawl.page',
data: [payload_data]
}
end
it 'returns success without enqueuing job' do
expect(Captain::Tools::FirecrawlParserJob).not_to receive(:perform_later)
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant_id}",
params: valid_params,
it 'returns unauthorized status' do
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=invalid_token",
params: invalid_params,
as: :json)
expect(response).to have_http_status(:ok)
expect(response.body).to be_empty
expect(response).to have_http_status(:unauthorized)
end
end
context 'with invalid assistant_id' do
context 'with non-existent assistant_id' do
it 'returns not found status' do
post("/enterprise/webhooks/firecrawl?assistant_id=invalid_id&token=#{valid_token}",
params: { type: 'crawl.page', data: [payload_data] },
as: :json)
expect(response).to have_http_status(:not_found)
end
end
context 'with nil assistant_id' do
it 'returns not found status' do
post("/enterprise/webhooks/firecrawl?token=#{valid_token}",
params: { type: 'crawl.page', data: [payload_data] },
as: :json)
expect(response).to have_http_status(:not_found)
end
end
end
context 'when CAPTAIN_FIRECRAWL_API_KEY is not configured' do
before do
api_key.destroy
end
it 'returns unauthorized status' do
post("/enterprise/webhooks/firecrawl?assistant_id=#{assistant.id}&token=#{valid_token}",
params: { type: 'crawl.page', data: [payload_data] },
as: :json)
expect(response).to have_http_status(:unauthorized)
end
end
end