Change account search tokenizer and queries (#26378)
This commit is contained in:
		@@ -33,7 +33,7 @@ class AccountsIndex < Chewy::Index
 | 
			
		||||
      },
 | 
			
		||||
 | 
			
		||||
      verbatim: {
 | 
			
		||||
        tokenizer: 'whitespace',
 | 
			
		||||
        tokenizer: 'standard',
 | 
			
		||||
        filter: %w(lowercase asciifolding cjk_width),
 | 
			
		||||
      },
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,143 @@ class AccountSearchService < BaseService
 | 
			
		||||
  # Min. number of characters to look for non-exact matches
 | 
			
		||||
  MIN_QUERY_LENGTH = 5
 | 
			
		||||
 | 
			
		||||
  class QueryBuilder
 | 
			
		||||
    def initialize(query, account, options = {})
 | 
			
		||||
      @query = query
 | 
			
		||||
      @account = account
 | 
			
		||||
      @options = options
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def build
 | 
			
		||||
      AccountsIndex.query(
 | 
			
		||||
        bool: {
 | 
			
		||||
          must: {
 | 
			
		||||
            function_score: {
 | 
			
		||||
              query: {
 | 
			
		||||
                bool: {
 | 
			
		||||
                  must: must_clauses,
 | 
			
		||||
                },
 | 
			
		||||
              },
 | 
			
		||||
 | 
			
		||||
              functions: [
 | 
			
		||||
                reputation_score_function,
 | 
			
		||||
                followers_score_function,
 | 
			
		||||
                time_distance_function,
 | 
			
		||||
              ],
 | 
			
		||||
            },
 | 
			
		||||
          },
 | 
			
		||||
 | 
			
		||||
          should: should_clauses,
 | 
			
		||||
        }
 | 
			
		||||
      )
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    def must_clauses
 | 
			
		||||
      if @account && @options[:following]
 | 
			
		||||
        [core_query, only_following_query]
 | 
			
		||||
      else
 | 
			
		||||
        [core_query]
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def should_clauses
 | 
			
		||||
      if @account && !@options[:following]
 | 
			
		||||
        [boost_following_query]
 | 
			
		||||
      else
 | 
			
		||||
        []
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # This function limits results to only the accounts the user is following
 | 
			
		||||
    def only_following_query
 | 
			
		||||
      {
 | 
			
		||||
        terms: {
 | 
			
		||||
          id: following_ids,
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # This function promotes accounts the user is following
 | 
			
		||||
    def boost_following_query
 | 
			
		||||
      {
 | 
			
		||||
        terms: {
 | 
			
		||||
          id: following_ids,
 | 
			
		||||
          boost: 100,
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # This function deranks accounts that follow more people than follow them
 | 
			
		||||
    def reputation_score_function
 | 
			
		||||
      {
 | 
			
		||||
        script_score: {
 | 
			
		||||
          script: {
 | 
			
		||||
            source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
 | 
			
		||||
          },
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # This function promotes accounts that have more followers
 | 
			
		||||
    def followers_score_function
 | 
			
		||||
      {
 | 
			
		||||
        script_score: {
 | 
			
		||||
          script: {
 | 
			
		||||
            source: "(Math.max(doc['followers_count'].value, 0) / (Math.max(doc['followers_count'].value, 0) + 1))",
 | 
			
		||||
          },
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    # This function deranks accounts that haven't posted in a long time
 | 
			
		||||
    def time_distance_function
 | 
			
		||||
      {
 | 
			
		||||
        gauss: {
 | 
			
		||||
          last_status_at: {
 | 
			
		||||
            scale: '30d',
 | 
			
		||||
            offset: '30d',
 | 
			
		||||
            decay: 0.3,
 | 
			
		||||
          },
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    def following_ids
 | 
			
		||||
      @following_ids ||= @account.active_relationships.pluck(:target_account_id) + [@account.id]
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  class AutocompleteQueryBuilder < QueryBuilder
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    def core_query
 | 
			
		||||
      {
 | 
			
		||||
        multi_match: {
 | 
			
		||||
          query: @query,
 | 
			
		||||
          type: 'bool_prefix',
 | 
			
		||||
          fields: %w(username username.* display_name display_name.*),
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  class FullQueryBuilder < QueryBuilder
 | 
			
		||||
    private
 | 
			
		||||
 | 
			
		||||
    def core_query
 | 
			
		||||
      {
 | 
			
		||||
        multi_match: {
 | 
			
		||||
          query: @query,
 | 
			
		||||
          type: 'most_fields',
 | 
			
		||||
          fields: %w(username^2 display_name^2 text text.*),
 | 
			
		||||
          operator: 'and',
 | 
			
		||||
        },
 | 
			
		||||
      }
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def call(query, account = nil, options = {})
 | 
			
		||||
    @query   = query&.strip&.gsub(/\A@/, '')
 | 
			
		||||
    @limit   = options[:limit].to_i
 | 
			
		||||
@@ -71,27 +208,15 @@ class AccountSearchService < BaseService
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def from_elasticsearch
 | 
			
		||||
    must_clauses   = must_clause
 | 
			
		||||
    should_clauses = should_clause
 | 
			
		||||
 | 
			
		||||
    if account
 | 
			
		||||
      return [] if options[:following] && following_ids.empty?
 | 
			
		||||
 | 
			
		||||
      if options[:following]
 | 
			
		||||
        must_clauses << { terms: { id: following_ids } }
 | 
			
		||||
      elsif following_ids.any?
 | 
			
		||||
        should_clauses << { terms: { id: following_ids, boost: 100 } }
 | 
			
		||||
    query_builder = begin
 | 
			
		||||
      if options[:use_searchable_text]
 | 
			
		||||
        FullQueryBuilder.new(terms_for_query, account, options.slice(:following))
 | 
			
		||||
      else
 | 
			
		||||
        AutocompleteQueryBuilder.new(terms_for_query, account, options.slice(:following))
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    query     = { bool: { must: must_clauses, should: should_clauses } }
 | 
			
		||||
    functions = [reputation_score_function, followers_score_function, time_distance_function]
 | 
			
		||||
 | 
			
		||||
    records = AccountsIndex.query(function_score: { query: query, functions: functions })
 | 
			
		||||
                           .limit(limit_for_non_exact_results)
 | 
			
		||||
                           .offset(offset)
 | 
			
		||||
                           .objects
 | 
			
		||||
                           .compact
 | 
			
		||||
    records = query_builder.build.limit(limit_for_non_exact_results).offset(offset).objects.compact
 | 
			
		||||
 | 
			
		||||
    ActiveRecord::Associations::Preloader.new(records: records, associations: :account_stat)
 | 
			
		||||
 | 
			
		||||
@@ -100,76 +225,6 @@ class AccountSearchService < BaseService
 | 
			
		||||
    nil
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def reputation_score_function
 | 
			
		||||
    {
 | 
			
		||||
      script_score: {
 | 
			
		||||
        script: {
 | 
			
		||||
          source: "(Math.max(doc['followers_count'].value, 0) + 0.0) / (Math.max(doc['followers_count'].value, 0) + Math.max(doc['following_count'].value, 0) + 1)",
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def followers_score_function
 | 
			
		||||
    {
 | 
			
		||||
      script_score: {
 | 
			
		||||
        script: {
 | 
			
		||||
          source: "Math.log10(Math.max(doc['followers_count'].value, 0) + 2)",
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def time_distance_function
 | 
			
		||||
    {
 | 
			
		||||
      gauss: {
 | 
			
		||||
        last_status_at: {
 | 
			
		||||
          scale: '30d',
 | 
			
		||||
          offset: '30d',
 | 
			
		||||
          decay: 0.3,
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def must_clause
 | 
			
		||||
    if options[:start_with_hashtag]
 | 
			
		||||
      fields = %w(text text.*)
 | 
			
		||||
    else
 | 
			
		||||
      fields = %w(username username.* display_name display_name.*)
 | 
			
		||||
      fields << 'text' << 'text.*' if options[:use_searchable_text]
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    [
 | 
			
		||||
      {
 | 
			
		||||
        multi_match: {
 | 
			
		||||
          query: terms_for_query,
 | 
			
		||||
          fields: fields,
 | 
			
		||||
          type: 'best_fields',
 | 
			
		||||
          operator: 'or',
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    ]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def should_clause
 | 
			
		||||
    [
 | 
			
		||||
      {
 | 
			
		||||
        multi_match: {
 | 
			
		||||
          query: terms_for_query,
 | 
			
		||||
          fields: %w(username username.* display_name display_name.*),
 | 
			
		||||
          type: 'best_fields',
 | 
			
		||||
          operator: 'and',
 | 
			
		||||
          boost: 10,
 | 
			
		||||
        },
 | 
			
		||||
      },
 | 
			
		||||
    ]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def following_ids
 | 
			
		||||
    @following_ids ||= account.active_relationships.pluck(:target_account_id) + [account.id]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def limit_for_non_exact_results
 | 
			
		||||
    return 0 if @account.nil? && query.size < MIN_QUERY_LENGTH
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user