Improve performance of follow recommendation scheduler (#16159)
Express follow_recommendations in terms of account_summaries rather than accounts, integrate filters that are unconditionally used, and materialize the resulting view. This should result in the bulk of the computation being performed only once instead of **once per recommendation language**.
This commit is contained in:
		@@ -14,9 +14,11 @@ class FollowRecommendation < ApplicationRecord
 | 
			
		||||
  belongs_to :account_summary, foreign_key: :account_id
 | 
			
		||||
  belongs_to :account, foreign_key: :account_id
 | 
			
		||||
 | 
			
		||||
  scope :safe, -> { joins(:account_summary).merge(AccountSummary.safe) }
 | 
			
		||||
  scope :localized, ->(locale) { joins(:account_summary).merge(AccountSummary.localized(locale)) }
 | 
			
		||||
  scope :filtered, -> { joins(:account_summary).merge(AccountSummary.filtered) }
 | 
			
		||||
 | 
			
		||||
  def self.refresh
 | 
			
		||||
    Scenic.database.refresh_materialized_view(table_name, concurrently: true, cascade: false)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def readonly?
 | 
			
		||||
    true
 | 
			
		||||
 
 | 
			
		||||
@@ -14,13 +14,14 @@ class Scheduler::FollowRecommendationsScheduler
 | 
			
		||||
  def perform
 | 
			
		||||
    # Maintaining a materialized view speeds-up subsequent queries significantly
 | 
			
		||||
    AccountSummary.refresh
 | 
			
		||||
    FollowRecommendation.refresh
 | 
			
		||||
 | 
			
		||||
    fallback_recommendations = FollowRecommendation.safe.filtered.limit(SET_SIZE).index_by(&:account_id)
 | 
			
		||||
    fallback_recommendations = FollowRecommendation.limit(SET_SIZE).index_by(&:account_id)
 | 
			
		||||
 | 
			
		||||
    I18n.available_locales.each do |locale|
 | 
			
		||||
      recommendations = begin
 | 
			
		||||
        if AccountSummary.safe.filtered.localized(locale).exists? # We can skip the work if no accounts with that language exist
 | 
			
		||||
          FollowRecommendation.safe.filtered.localized(locale).limit(SET_SIZE).index_by(&:account_id)
 | 
			
		||||
          FollowRecommendation.localized(locale).limit(SET_SIZE).index_by(&:account_id)
 | 
			
		||||
        else
 | 
			
		||||
          {}
 | 
			
		||||
        end
 | 
			
		||||
 
 | 
			
		||||
@@ -0,0 +1,18 @@
 | 
			
		||||
class UpdateFollowRecommendationsToVersion2 < ActiveRecord::Migration[6.1]
 | 
			
		||||
  # We're switching from a normal to a materialized view so we need
 | 
			
		||||
  # custom `up` and `down` paths.
 | 
			
		||||
 | 
			
		||||
  def up
 | 
			
		||||
    drop_view :follow_recommendations
 | 
			
		||||
    create_view :follow_recommendations, version: 2, materialized: true
 | 
			
		||||
 | 
			
		||||
    # To be able to refresh the view concurrently,
 | 
			
		||||
    # at least one unique index is required
 | 
			
		||||
    safety_assured { add_index :follow_recommendations, :account_id, unique: true }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def down
 | 
			
		||||
    drop_view :follow_recommendations, materialized: true
 | 
			
		||||
    create_view :follow_recommendations, version: 1
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
							
								
								
									
										28
									
								
								db/schema.rb
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								db/schema.rb
									
									
									
									
									
								
							@@ -10,7 +10,7 @@
 | 
			
		||||
#
 | 
			
		||||
# It's strongly recommended that you check this file into your version control system.
 | 
			
		||||
 | 
			
		||||
ActiveRecord::Schema.define(version: 2021_04_25_135952) do
 | 
			
		||||
ActiveRecord::Schema.define(version: 2021_05_05_174616) do
 | 
			
		||||
 | 
			
		||||
  # These are extensions that must be enabled in order to support this database
 | 
			
		||||
  enable_extension "plpgsql"
 | 
			
		||||
@@ -1114,30 +1114,34 @@ ActiveRecord::Schema.define(version: 2021_04_25_135952) do
 | 
			
		||||
  SQL
 | 
			
		||||
  add_index "account_summaries", ["account_id"], name: "index_account_summaries_on_account_id", unique: true
 | 
			
		||||
 | 
			
		||||
  create_view "follow_recommendations", sql_definition: <<-SQL
 | 
			
		||||
  create_view "follow_recommendations", materialized: true, sql_definition: <<-SQL
 | 
			
		||||
      SELECT t0.account_id,
 | 
			
		||||
      sum(t0.rank) AS rank,
 | 
			
		||||
      array_agg(t0.reason) AS reason
 | 
			
		||||
     FROM ( SELECT accounts.id AS account_id,
 | 
			
		||||
     FROM ( SELECT account_summaries.account_id,
 | 
			
		||||
              ((count(follows.id))::numeric / (1.0 + (count(follows.id))::numeric)) AS rank,
 | 
			
		||||
              'most_followed'::text AS reason
 | 
			
		||||
             FROM ((follows
 | 
			
		||||
               JOIN accounts ON ((accounts.id = follows.target_account_id)))
 | 
			
		||||
             FROM (((follows
 | 
			
		||||
               JOIN account_summaries ON ((account_summaries.account_id = follows.target_account_id)))
 | 
			
		||||
               JOIN users ON ((users.account_id = follows.account_id)))
 | 
			
		||||
            WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
 | 
			
		||||
            GROUP BY accounts.id
 | 
			
		||||
               LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = follows.target_account_id)))
 | 
			
		||||
            WHERE ((users.current_sign_in_at >= (now() - 'P30D'::interval)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
 | 
			
		||||
            GROUP BY account_summaries.account_id
 | 
			
		||||
           HAVING (count(follows.id) >= 5)
 | 
			
		||||
          UNION ALL
 | 
			
		||||
           SELECT accounts.id AS account_id,
 | 
			
		||||
           SELECT account_summaries.account_id,
 | 
			
		||||
              (sum((status_stats.reblogs_count + status_stats.favourites_count)) / (1.0 + sum((status_stats.reblogs_count + status_stats.favourites_count)))) AS rank,
 | 
			
		||||
              'most_interactions'::text AS reason
 | 
			
		||||
             FROM ((status_stats
 | 
			
		||||
             FROM (((status_stats
 | 
			
		||||
               JOIN statuses ON ((statuses.id = status_stats.status_id)))
 | 
			
		||||
               JOIN accounts ON ((accounts.id = statuses.account_id)))
 | 
			
		||||
            WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (accounts.suspended_at IS NULL) AND (accounts.moved_to_account_id IS NULL) AND (accounts.silenced_at IS NULL) AND (accounts.locked = false) AND (accounts.discoverable = true))
 | 
			
		||||
            GROUP BY accounts.id
 | 
			
		||||
               JOIN account_summaries ON ((account_summaries.account_id = statuses.account_id)))
 | 
			
		||||
               LEFT JOIN follow_recommendation_suppressions ON ((follow_recommendation_suppressions.account_id = statuses.account_id)))
 | 
			
		||||
            WHERE ((statuses.id >= (((date_part('epoch'::text, (now() - 'P30D'::interval)) * (1000)::double precision))::bigint << 16)) AND (account_summaries.sensitive = false) AND (follow_recommendation_suppressions.id IS NULL))
 | 
			
		||||
            GROUP BY account_summaries.account_id
 | 
			
		||||
           HAVING (sum((status_stats.reblogs_count + status_stats.favourites_count)) >= (5)::numeric)) t0
 | 
			
		||||
    GROUP BY t0.account_id
 | 
			
		||||
    ORDER BY (sum(t0.rank)) DESC;
 | 
			
		||||
  SQL
 | 
			
		||||
  add_index "follow_recommendations", ["account_id"], name: "index_follow_recommendations_on_account_id", unique: true
 | 
			
		||||
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										34
									
								
								db/views/follow_recommendations_v02.sql
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										34
									
								
								db/views/follow_recommendations_v02.sql
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,34 @@
 | 
			
		||||
SELECT
 | 
			
		||||
  account_id,
 | 
			
		||||
  sum(rank) AS rank,
 | 
			
		||||
  array_agg(reason) AS reason
 | 
			
		||||
FROM (
 | 
			
		||||
  SELECT
 | 
			
		||||
    account_summaries.account_id AS account_id,
 | 
			
		||||
    count(follows.id) / (1.0 + count(follows.id)) AS rank,
 | 
			
		||||
    'most_followed' AS reason
 | 
			
		||||
  FROM follows
 | 
			
		||||
  INNER JOIN account_summaries ON account_summaries.account_id = follows.target_account_id
 | 
			
		||||
  INNER JOIN users ON users.account_id = follows.account_id
 | 
			
		||||
  LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = follows.target_account_id
 | 
			
		||||
  WHERE users.current_sign_in_at >= (now() - interval '30 days')
 | 
			
		||||
    AND account_summaries.sensitive = 'f'
 | 
			
		||||
    AND follow_recommendation_suppressions.id IS NULL
 | 
			
		||||
  GROUP BY account_summaries.account_id
 | 
			
		||||
  HAVING count(follows.id) >= 5
 | 
			
		||||
  UNION ALL
 | 
			
		||||
  SELECT account_summaries.account_id AS account_id,
 | 
			
		||||
         sum(reblogs_count + favourites_count) / (1.0 + sum(reblogs_count + favourites_count)) AS rank,
 | 
			
		||||
         'most_interactions' AS reason
 | 
			
		||||
  FROM status_stats
 | 
			
		||||
  INNER JOIN statuses ON statuses.id = status_stats.status_id
 | 
			
		||||
  INNER JOIN account_summaries ON account_summaries.account_id = statuses.account_id
 | 
			
		||||
  LEFT OUTER JOIN follow_recommendation_suppressions ON follow_recommendation_suppressions.account_id = statuses.account_id
 | 
			
		||||
  WHERE statuses.id >= ((date_part('epoch', now() - interval '30 days') * 1000)::bigint << 16)
 | 
			
		||||
    AND account_summaries.sensitive = 'f'
 | 
			
		||||
    AND follow_recommendation_suppressions.id IS NULL
 | 
			
		||||
  GROUP BY account_summaries.account_id
 | 
			
		||||
  HAVING sum(reblogs_count + favourites_count) >= 5
 | 
			
		||||
) t0
 | 
			
		||||
GROUP BY account_id
 | 
			
		||||
ORDER BY rank DESC
 | 
			
		||||
		Reference in New Issue
	
	Block a user