feat: improve article search ranking (#11640)

This commit is contained in:
Shivam Mishra
2025-06-03 12:01:17 +05:30
committed by GitHub
parent bae958334d
commit 02c4863d95
3 changed files with 15 additions and 25 deletions

View File

@@ -76,18 +76,24 @@ class Article < ApplicationRecord
scope :order_by_views, -> { reorder(views: :desc) } scope :order_by_views, -> { reorder(views: :desc) }
# TODO: if text search slows down https://www.postgresql.org/docs/current/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS # TODO: if text search slows down https://www.postgresql.org/docs/current/textsearch-features.html#TEXTSEARCH-UPDATE-TRIGGERS
# - the A, B and C are for weightage. See: https://github.com/Casecommons/pg_search#weighting
# - the normalization is for ensuring the long articles that mention the search term too many times are not ranked higher.
# it divides rank by log(document_length) to prevent longer articles from ranking higher just due to sizeSee: https://github.com/Casecommons/pg_search#normalization
# - the ranking is to ensure that articles with higher weightage are ranked higher
pg_search_scope( pg_search_scope(
:text_search, :text_search,
against: %i[ against: {
title title: 'A',
description description: 'B',
content content: 'C'
], },
using: { using: {
tsearch: { tsearch: {
prefix: true prefix: true,
normalization: 2
} }
} },
ranked_by: ':tsearch'
) )
def self.search(params) def self.search(params)

View File

@@ -96,7 +96,6 @@ class SearchService
def filter_articles def filter_articles
@articles = current_account.articles @articles = current_account.articles
.text_search(search_query) .text_search(search_query)
.reorder('updated_at DESC')
.page(params[:page]) .page(params[:page])
.per(15) .per(15)
end end

View File

@@ -156,33 +156,18 @@ describe SearchService do
end end
context 'when article search' do context 'when article search' do
it 'orders results by updated_at desc' do it 'returns matching articles' do
# Create articles with explicit timestamps
older_time = 2.days.ago
newer_time = 1.hour.ago
article2 = create(:article, title: 'Spellcasting Guide', article2 = create(:article, title: 'Spellcasting Guide',
account: account, portal: portal, author: user, status: 'published') account: account, portal: portal, author: user, status: 'published')
# rubocop:disable Rails/SkipsModelValidations
article2.update_column(:updated_at, older_time)
# rubocop:enable Rails/SkipsModelValidations
article3 = create(:article, title: 'Spellcasting Manual', article3 = create(:article, title: 'Spellcasting Manual',
account: account, portal: portal, author: user, status: 'published') account: account, portal: portal, author: user, status: 'published')
# rubocop:disable Rails/SkipsModelValidations
article3.update_column(:updated_at, newer_time)
# rubocop:enable Rails/SkipsModelValidations
params = { q: 'Spellcasting' } params = { q: 'Spellcasting' }
search = described_class.new(current_user: user, current_account: account, params: params, search_type: 'Article') search = described_class.new(current_user: user, current_account: account, params: params, search_type: 'Article')
results = search.perform[:articles] results = search.perform[:articles]
# Check the timestamps to understand ordering
results.map { |a| [a.id, a.updated_at] }
# Should be ordered by updated_at desc (newer first)
expect(results.length).to eq(2) expect(results.length).to eq(2)
expect(results.first.updated_at).to be > results.second.updated_at expect(results.map(&:id)).to contain_exactly(article2.id, article3.id)
end end
it 'returns paginated results' do it 'returns paginated results' do