Fix performance of percentile calculation for annual reports (#32765)
This commit is contained in:
		@@ -17,11 +17,21 @@ class AnnualReport
 | 
			
		||||
 | 
			
		||||
  SCHEMA = 1
 | 
			
		||||
 | 
			
		||||
  def self.table_name_prefix
 | 
			
		||||
    'annual_report_'
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def initialize(account, year)
 | 
			
		||||
    @account = account
 | 
			
		||||
    @year = year
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def self.prepare(year)
 | 
			
		||||
    SOURCES.each do |klass|
 | 
			
		||||
      klass.prepare(year)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def generate
 | 
			
		||||
    return if GeneratedAnnualReport.exists?(account: @account, year: @year)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,62 +1,37 @@
 | 
			
		||||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
class AnnualReport::Percentiles < AnnualReport::Source
 | 
			
		||||
  def self.prepare(year)
 | 
			
		||||
    AnnualReport::StatusesPerAccountCount.connection.exec_query(<<~SQL.squish, nil, [year, Mastodon::Snowflake.id_at(DateTime.new(year).beginning_of_year), Mastodon::Snowflake.id_at(DateTime.new(year).end_of_year)])
 | 
			
		||||
      INSERT INTO annual_report_statuses_per_account_counts (year, account_id, statuses_count)
 | 
			
		||||
      SELECT $1, account_id, count(*)
 | 
			
		||||
      FROM statuses
 | 
			
		||||
      WHERE id BETWEEN $2 AND $3
 | 
			
		||||
      AND (local OR uri IS NULL)
 | 
			
		||||
      GROUP BY account_id
 | 
			
		||||
      ON CONFLICT (year, account_id) DO NOTHING
 | 
			
		||||
    SQL
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def generate
 | 
			
		||||
    {
 | 
			
		||||
      percentiles: {
 | 
			
		||||
        followers: (total_with_fewer_followers / (total_with_any_followers + 1.0)) * 100,
 | 
			
		||||
        statuses: (total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100,
 | 
			
		||||
        statuses: 100.0 - ((total_with_fewer_statuses / (total_with_any_statuses + 1.0)) * 100),
 | 
			
		||||
      },
 | 
			
		||||
    }
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  private
 | 
			
		||||
 | 
			
		||||
  def followers_gained
 | 
			
		||||
    @followers_gained ||= @account.passive_relationships.where("date_part('year', follows.created_at) = ?", @year).count
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def statuses_created
 | 
			
		||||
    @statuses_created ||= report_statuses.count
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def total_with_fewer_followers
 | 
			
		||||
    @total_with_fewer_followers ||= Follow.find_by_sql([<<~SQL.squish, { year: @year, comparison: followers_gained }]).first.total
 | 
			
		||||
      WITH tmp0 AS (
 | 
			
		||||
        SELECT follows.target_account_id
 | 
			
		||||
        FROM follows
 | 
			
		||||
        INNER JOIN accounts ON accounts.id = follows.target_account_id
 | 
			
		||||
        WHERE date_part('year', follows.created_at) = :year
 | 
			
		||||
          AND accounts.domain IS NULL
 | 
			
		||||
        GROUP BY follows.target_account_id
 | 
			
		||||
        HAVING COUNT(*) < :comparison
 | 
			
		||||
      )
 | 
			
		||||
      SELECT count(*) AS total
 | 
			
		||||
      FROM tmp0
 | 
			
		||||
    SQL
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def total_with_fewer_statuses
 | 
			
		||||
    @total_with_fewer_statuses ||= Status.find_by_sql([<<~SQL.squish, { comparison: statuses_created, min_id: year_as_snowflake_range.first, max_id: year_as_snowflake_range.last }]).first.total
 | 
			
		||||
      WITH tmp0 AS (
 | 
			
		||||
        SELECT statuses.account_id
 | 
			
		||||
        FROM statuses
 | 
			
		||||
        INNER JOIN accounts ON accounts.id = statuses.account_id
 | 
			
		||||
        WHERE statuses.id BETWEEN :min_id AND :max_id
 | 
			
		||||
          AND accounts.domain IS NULL
 | 
			
		||||
        GROUP BY statuses.account_id
 | 
			
		||||
        HAVING count(*) < :comparison
 | 
			
		||||
      )
 | 
			
		||||
      SELECT count(*) AS total
 | 
			
		||||
      FROM tmp0
 | 
			
		||||
    SQL
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def total_with_any_followers
 | 
			
		||||
    @total_with_any_followers ||= Follow.where("date_part('year', follows.created_at) = ?", @year).joins(:target_account).merge(Account.local).count('distinct follows.target_account_id')
 | 
			
		||||
    @total_with_fewer_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).where(statuses_count: ...statuses_created).count
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def total_with_any_statuses
 | 
			
		||||
    @total_with_any_statuses ||= Status.where(id: year_as_snowflake_range).joins(:account).merge(Account.local).count('distinct statuses.account_id')
 | 
			
		||||
    @total_with_any_statuses ||= AnnualReport::StatusesPerAccountCount.where(year: year).count
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
@@ -8,6 +8,14 @@ class AnnualReport::Source
 | 
			
		||||
    @year = year
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def self.prepare(_year)
 | 
			
		||||
    # Use this method if any pre-calculations must be made before individual annual reports are generated
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def generate
 | 
			
		||||
    raise NotImplementedError
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  protected
 | 
			
		||||
 | 
			
		||||
  def report_statuses
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										15
									
								
								app/models/annual_report/statuses_per_account_count.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										15
									
								
								app/models/annual_report/statuses_per_account_count.rb
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,15 @@
 | 
			
		||||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
# == Schema Information
 | 
			
		||||
#
 | 
			
		||||
# Table name: annual_report_statuses_per_account_counts
 | 
			
		||||
#
 | 
			
		||||
#  id             :bigint(8)        not null, primary key
 | 
			
		||||
#  year           :integer          not null
 | 
			
		||||
#  account_id     :bigint(8)        not null
 | 
			
		||||
#  statuses_count :bigint(8)        not null
 | 
			
		||||
#
 | 
			
		||||
 | 
			
		||||
class AnnualReport::StatusesPerAccountCount < ApplicationRecord
 | 
			
		||||
  # This table facilitates percentile calculations
 | 
			
		||||
end
 | 
			
		||||
@@ -0,0 +1,13 @@
 | 
			
		||||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
class CreateAnnualReportStatusesPerAccountCounts < ActiveRecord::Migration[7.1]
 | 
			
		||||
  def change
 | 
			
		||||
    create_table :annual_report_statuses_per_account_counts do |t| # rubocop:disable Rails/CreateTableWithTimestamps
 | 
			
		||||
      t.integer :year, null: false
 | 
			
		||||
      t.bigint :account_id, null: false
 | 
			
		||||
      t.bigint :statuses_count, null: false
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    add_index :annual_report_statuses_per_account_counts, [:year, :account_id], unique: true
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
@@ -10,7 +10,7 @@
 | 
			
		||||
#
 | 
			
		||||
# It's strongly recommended that you check this file into your version control system.
 | 
			
		||||
 | 
			
		||||
ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do
 | 
			
		||||
ActiveRecord::Schema[7.1].define(version: 2024_11_04_082851) do
 | 
			
		||||
  # These are extensions that must be enabled in order to support this database
 | 
			
		||||
  enable_extension "plpgsql"
 | 
			
		||||
 | 
			
		||||
@@ -260,6 +260,13 @@ ActiveRecord::Schema[7.1].define(version: 2024_10_22_214312) do
 | 
			
		||||
    t.bigint "status_ids", array: true
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  create_table "annual_report_statuses_per_account_counts", force: :cascade do |t|
 | 
			
		||||
   t.integer "year", null: false
 | 
			
		||||
   t.bigint "account_id", null: false
 | 
			
		||||
   t.bigint "statuses_count", null: false
 | 
			
		||||
   t.index ["year", "account_id"], name: "idx_on_year_account_id_ff3e167cef", unique: true
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  create_table "appeals", force: :cascade do |t|
 | 
			
		||||
    t.bigint "account_id", null: false
 | 
			
		||||
    t.bigint "account_warning_id", null: false
 | 
			
		||||
 
 | 
			
		||||
@@ -4,17 +4,20 @@ require 'rails_helper'
 | 
			
		||||
 | 
			
		||||
RSpec.describe AnnualReport::Percentiles do
 | 
			
		||||
  describe '#generate' do
 | 
			
		||||
    subject { described_class.new(account, Time.zone.now.year) }
 | 
			
		||||
    subject { described_class.new(account, year) }
 | 
			
		||||
 | 
			
		||||
    let(:year) { Time.zone.now.year }
 | 
			
		||||
 | 
			
		||||
    context 'with an inactive account' do
 | 
			
		||||
      let(:account) { Fabricate :account }
 | 
			
		||||
 | 
			
		||||
      it 'builds a report for an account' do
 | 
			
		||||
        described_class.prepare(year)
 | 
			
		||||
 | 
			
		||||
        expect(subject.generate)
 | 
			
		||||
          .to include(
 | 
			
		||||
            percentiles: include(
 | 
			
		||||
              followers: 0,
 | 
			
		||||
              statuses: 0
 | 
			
		||||
              statuses: 100
 | 
			
		||||
            )
 | 
			
		||||
          )
 | 
			
		||||
      end
 | 
			
		||||
@@ -25,16 +28,15 @@ RSpec.describe AnnualReport::Percentiles do
 | 
			
		||||
 | 
			
		||||
      before do
 | 
			
		||||
        Fabricate.times 2, :status # Others as `account`
 | 
			
		||||
        Fabricate.times 2, :follow # Others as `target_account`
 | 
			
		||||
        Fabricate.times 2, :status, account: account
 | 
			
		||||
        Fabricate.times 2, :follow, target_account: account
 | 
			
		||||
      end
 | 
			
		||||
 | 
			
		||||
      it 'builds a report for an account' do
 | 
			
		||||
        described_class.prepare(year)
 | 
			
		||||
 | 
			
		||||
        expect(subject.generate)
 | 
			
		||||
          .to include(
 | 
			
		||||
            percentiles: include(
 | 
			
		||||
              followers: 50,
 | 
			
		||||
              statuses: 50
 | 
			
		||||
            )
 | 
			
		||||
          )
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user