# Pull Request Template ## Description Add migrations for document auto-sync Fixes # (issue) ## Type of change - [x] New feature (non-breaking change which adds functionality) ## How Has This Been Tested? locally ## Checklist: - [x] My code follows the style guidelines of this project - [x] I have performed a self-review of my code - [x] I have commented on my code, particularly in hard-to-understand areas - [ ] I have made corresponding changes to the documentation - [x] My changes generate no new warnings - [x] I have added tests that prove my fix is effective or that my feature works - [x] New and existing unit tests pass locally with my changes - [x] Any dependent changes have been merged and published in downstream modules
177 lines
5.3 KiB
Ruby
177 lines
5.3 KiB
Ruby
# == Schema Information
|
|
#
|
|
# Table name: captain_documents
|
|
#
|
|
# id :bigint not null, primary key
|
|
# content :text
|
|
# external_link :string not null
|
|
# last_sync_attempted_at :datetime
|
|
# last_synced_at :datetime
|
|
# metadata :jsonb
|
|
# name :string
|
|
# status :integer default("in_progress"), not null
|
|
# sync_status :integer
|
|
# created_at :datetime not null
|
|
# updated_at :datetime not null
|
|
# account_id :bigint not null
|
|
# assistant_id :bigint not null
|
|
#
|
|
# Indexes
|
|
#
|
|
# index_captain_documents_on_account_id (account_id)
|
|
# index_captain_documents_on_account_id_and_sync_status (account_id,sync_status)
|
|
# index_captain_documents_on_assistant_id (assistant_id)
|
|
# index_captain_documents_on_assistant_id_and_external_link (assistant_id,external_link) UNIQUE
|
|
# index_captain_documents_on_status (status)
|
|
#
|
|
class Captain::Document < ApplicationRecord
|
|
class LimitExceededError < StandardError; end
|
|
self.table_name = 'captain_documents'
|
|
|
|
belongs_to :assistant, class_name: 'Captain::Assistant'
|
|
has_many :responses, class_name: 'Captain::AssistantResponse', dependent: :destroy, as: :documentable
|
|
belongs_to :account
|
|
has_one_attached :pdf_file
|
|
|
|
validates :external_link, presence: true, unless: -> { pdf_file.attached? }
|
|
validates :external_link, uniqueness: { scope: :assistant_id }, allow_blank: true
|
|
validates :content, length: { maximum: 200_000 }
|
|
validates :pdf_file, presence: true, if: :pdf_document?
|
|
validate :validate_pdf_format, if: :pdf_document?
|
|
validate :validate_file_attachment, if: -> { pdf_file.attached? }
|
|
before_validation :ensure_account_id
|
|
before_validation :set_external_link_for_pdf
|
|
before_validation :normalize_external_link
|
|
|
|
enum status: {
|
|
in_progress: 0,
|
|
available: 1
|
|
}
|
|
|
|
enum :sync_status, { syncing: 0, synced: 1, failed: 2 }, prefix: :sync
|
|
|
|
before_create :ensure_within_plan_limit
|
|
after_create_commit :enqueue_crawl_job
|
|
after_create_commit :update_document_usage
|
|
after_destroy :update_document_usage
|
|
after_commit :enqueue_response_builder_job
|
|
scope :ordered, -> { order(created_at: :desc) }
|
|
|
|
scope :for_account, ->(account_id) { where(account_id: account_id) }
|
|
scope :for_assistant, ->(assistant_id) { where(assistant_id: assistant_id) }
|
|
|
|
def pdf_document?
|
|
return true if pdf_file.attached? && pdf_file.blob.content_type == 'application/pdf'
|
|
|
|
external_link&.ends_with?('.pdf')
|
|
end
|
|
|
|
def content_type
|
|
pdf_file.blob.content_type if pdf_file.attached?
|
|
end
|
|
|
|
def file_size
|
|
pdf_file.blob.byte_size if pdf_file.attached?
|
|
end
|
|
|
|
def content_fingerprint
|
|
metadata&.dig('content_fingerprint')
|
|
end
|
|
|
|
def content_fingerprint=(value)
|
|
self.metadata = (metadata || {}).merge('content_fingerprint' => value)
|
|
end
|
|
|
|
def last_sync_error_code
|
|
metadata&.dig('last_sync_error_code')
|
|
end
|
|
|
|
def last_sync_error_code=(value)
|
|
self.metadata = (metadata || {}).merge('last_sync_error_code' => value)
|
|
end
|
|
|
|
def openai_file_id
|
|
metadata&.dig('openai_file_id')
|
|
end
|
|
|
|
def store_openai_file_id(file_id)
|
|
update!(metadata: (metadata || {}).merge('openai_file_id' => file_id))
|
|
end
|
|
|
|
def display_url
|
|
return external_link if external_link.present? && !external_link.start_with?('PDF:')
|
|
|
|
if pdf_file.attached?
|
|
Rails.application.routes.url_helpers.rails_blob_url(pdf_file, only_path: false)
|
|
else
|
|
external_link
|
|
end
|
|
end
|
|
|
|
private
|
|
|
|
def enqueue_crawl_job
|
|
return if status != 'in_progress'
|
|
|
|
Captain::Documents::CrawlJob.perform_later(self)
|
|
end
|
|
|
|
def enqueue_response_builder_job
|
|
return unless should_enqueue_response_builder?
|
|
|
|
Captain::Documents::ResponseBuilderJob.perform_later(self)
|
|
end
|
|
|
|
def should_enqueue_response_builder?
|
|
return false if destroyed?
|
|
return false unless available?
|
|
|
|
return saved_change_to_status? if pdf_document?
|
|
|
|
(saved_change_to_status? || saved_change_to_content?) && content.present?
|
|
end
|
|
|
|
def update_document_usage
|
|
account.update_document_usage
|
|
end
|
|
|
|
def ensure_account_id
|
|
self.account_id = assistant&.account_id
|
|
end
|
|
|
|
def ensure_within_plan_limit
|
|
limits = account.usage_limits[:captain][:documents]
|
|
raise LimitExceededError, I18n.t('captain.documents.limit_exceeded') unless limits[:current_available].positive?
|
|
end
|
|
|
|
def validate_pdf_format
|
|
return unless pdf_file.attached?
|
|
|
|
errors.add(:pdf_file, I18n.t('captain.documents.pdf_format_error')) unless pdf_file.blob.content_type == 'application/pdf'
|
|
end
|
|
|
|
def validate_file_attachment
|
|
return unless pdf_file.attached?
|
|
|
|
return unless pdf_file.blob.byte_size > 10.megabytes
|
|
|
|
errors.add(:pdf_file, I18n.t('captain.documents.pdf_size_error'))
|
|
end
|
|
|
|
def set_external_link_for_pdf
|
|
return unless pdf_file.attached? && external_link.blank?
|
|
|
|
# Set a unique external_link for PDF files
|
|
# Format: PDF: filename_timestamp (without extension)
|
|
timestamp = Time.current.strftime('%Y%m%d%H%M%S')
|
|
self.external_link = "PDF: #{pdf_file.filename.base}_#{timestamp}"
|
|
end
|
|
|
|
def normalize_external_link
|
|
return if external_link.blank?
|
|
return if pdf_document?
|
|
|
|
self.external_link = external_link.delete_suffix('/')
|
|
end
|
|
end
|