Add Fetch All Replies Part 1: Backend (#32615)
Signed-off-by: sneakers-the-rat <sneakers-the-rat@protonmail.com> Co-authored-by: jonny <j@nny.fyi> Co-authored-by: Claire <claire.github-309c@sitedethib.com> Co-authored-by: Kouhai <66407198+kouhaidev@users.noreply.github.com>
This commit is contained in:
		
							
								
								
									
										68
									
								
								app/services/activitypub/fetch_all_replies_service.rb
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								app/services/activitypub/fetch_all_replies_service.rb
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,68 @@
 | 
			
		||||
# frozen_string_literal: true
 | 
			
		||||
 | 
			
		||||
class ActivityPub::FetchAllRepliesService < ActivityPub::FetchRepliesService
 | 
			
		||||
  include JsonLdHelper
 | 
			
		||||
 | 
			
		||||
  # Limit of replies to fetch per status
 | 
			
		||||
  MAX_REPLIES = (ENV['FETCH_REPLIES_MAX_SINGLE'] || 500).to_i
 | 
			
		||||
 | 
			
		||||
  def call(collection_or_uri, status_uri, max_pages = nil, request_id: nil)
 | 
			
		||||
    @allow_synchronous_requests = true
 | 
			
		||||
    @collection_or_uri = collection_or_uri
 | 
			
		||||
    @status_uri = status_uri
 | 
			
		||||
 | 
			
		||||
    @items, n_pages = collection_items(collection_or_uri, max_pages)
 | 
			
		||||
    @items = filtered_replies
 | 
			
		||||
    return if @items.nil?
 | 
			
		||||
 | 
			
		||||
    FetchReplyWorker.push_bulk(@items) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
 | 
			
		||||
 | 
			
		||||
    [@items, n_pages]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  private
 | 
			
		||||
 | 
			
		||||
  def filtered_replies
 | 
			
		||||
    return if @items.nil?
 | 
			
		||||
 | 
			
		||||
    # Find all statuses that we *shouldn't* update the replies for, and use that as a filter.
 | 
			
		||||
    # We don't assume that we have the statuses before they're created,
 | 
			
		||||
    # hence the negative filter -
 | 
			
		||||
    # "keep all these uris except the ones we already have"
 | 
			
		||||
    # instead of
 | 
			
		||||
    # "keep all these uris that match some conditions on existing Status objects"
 | 
			
		||||
    #
 | 
			
		||||
    # Typically we assume the number of replies we *shouldn't* fetch is smaller than the
 | 
			
		||||
    # replies we *should* fetch, so we also minimize the number of uris we should load here.
 | 
			
		||||
    uris = @items.map { |item| value_or_id(item) }
 | 
			
		||||
 | 
			
		||||
    # Expand collection to get replies in the DB that were
 | 
			
		||||
    # - not included in the collection,
 | 
			
		||||
    # - that we have locally
 | 
			
		||||
    # - but we have no local followers and thus don't get updates/deletes for
 | 
			
		||||
    parent_id = Status.where(uri: @status_uri).pick(:id)
 | 
			
		||||
    unless parent_id.nil?
 | 
			
		||||
      unsubscribed_replies = Status
 | 
			
		||||
                             .where.not(uri: uris)
 | 
			
		||||
                             .where(in_reply_to_id: parent_id)
 | 
			
		||||
                             .unsubscribed
 | 
			
		||||
                             .pluck(:uri)
 | 
			
		||||
      uris.concat(unsubscribed_replies)
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    dont_update = Status.where(uri: uris).should_not_fetch_replies.pluck(:uri)
 | 
			
		||||
 | 
			
		||||
    # touch all statuses that already exist and that we're about to update
 | 
			
		||||
    Status.where(uri: uris).should_fetch_replies.touch_all(:fetched_replies_at)
 | 
			
		||||
 | 
			
		||||
    # Reject all statuses that we already have in the db
 | 
			
		||||
    uris = (uris - dont_update).take(MAX_REPLIES)
 | 
			
		||||
 | 
			
		||||
    Rails.logger.debug { "FetchAllRepliesService - #{@collection_or_uri}: Fetching filtered statuses: #{uris}" }
 | 
			
		||||
    uris
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def filter_by_host?
 | 
			
		||||
    false
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
@@ -33,7 +33,7 @@ class ActivityPub::FetchFeaturedCollectionService < BaseService
 | 
			
		||||
    return collection_or_uri if collection_or_uri.is_a?(Hash)
 | 
			
		||||
    return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
 | 
			
		||||
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, local_follower, true)
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def process_items(items)
 | 
			
		||||
 
 | 
			
		||||
@@ -45,7 +45,7 @@ class ActivityPub::FetchFeaturedTagsCollectionService < BaseService
 | 
			
		||||
    return collection_or_uri if collection_or_uri.is_a?(Hash)
 | 
			
		||||
    return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
 | 
			
		||||
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, local_follower, true)
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, local_follower, raise_on_error: :temporary)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def process_items(items)
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@ class ActivityPub::FetchRemoteStatusService < BaseService
 | 
			
		||||
 | 
			
		||||
    @request_id = request_id || "#{Time.now.utc.to_i}-status-#{uri}"
 | 
			
		||||
    @json = if prefetched_body.nil?
 | 
			
		||||
              fetch_resource(uri, true, on_behalf_of)
 | 
			
		||||
              fetch_status(uri, true, on_behalf_of)
 | 
			
		||||
            else
 | 
			
		||||
              body_to_json(prefetched_body, compare_id: uri)
 | 
			
		||||
            end
 | 
			
		||||
@@ -80,4 +80,20 @@ class ActivityPub::FetchRemoteStatusService < BaseService
 | 
			
		||||
  def expected_object_type?
 | 
			
		||||
    equals_or_includes_any?(@json['type'], ActivityPub::Activity::Create::SUPPORTED_TYPES + ActivityPub::Activity::Create::CONVERTED_TYPES)
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch_status(uri, id_is_known, on_behalf_of = nil)
 | 
			
		||||
    begin
 | 
			
		||||
      fetch_resource(uri, id_is_known, on_behalf_of, raise_on_error: true)
 | 
			
		||||
    rescue Mastodon::UnexpectedResponseError => e
 | 
			
		||||
      return unless e.response.code == 404
 | 
			
		||||
 | 
			
		||||
      # If this is a 404 from a status from an account that has no local followers, delete it
 | 
			
		||||
      existing_status = Status.find_by(uri: uri)
 | 
			
		||||
      if !existing_status.nil? && existing_status.unsubscribed? && existing_status.distributable?
 | 
			
		||||
        Rails.logger.debug { "FetchRemoteStatusService - Got 404 for orphaned status with URI #{uri}, deleting" }
 | 
			
		||||
        Tombstone.find_or_create_by(uri: uri, account: existing_status.account)
 | 
			
		||||
        RemoveStatusService.new.call(existing_status, redraft: false)
 | 
			
		||||
      end
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
@@ -3,11 +3,14 @@
 | 
			
		||||
class ActivityPub::FetchRepliesService < BaseService
 | 
			
		||||
  include JsonLdHelper
 | 
			
		||||
 | 
			
		||||
  # Limit of fetched replies
 | 
			
		||||
  MAX_REPLIES = 5
 | 
			
		||||
 | 
			
		||||
  def call(parent_status, collection_or_uri, allow_synchronous_requests: true, request_id: nil)
 | 
			
		||||
    @account = parent_status.account
 | 
			
		||||
    @allow_synchronous_requests = allow_synchronous_requests
 | 
			
		||||
 | 
			
		||||
    @items = collection_items(collection_or_uri)
 | 
			
		||||
    @items, = collection_items(collection_or_uri)
 | 
			
		||||
    return if @items.nil?
 | 
			
		||||
 | 
			
		||||
    FetchReplyWorker.push_bulk(filtered_replies) { |reply_uri| [reply_uri, { 'request_id' => request_id }] }
 | 
			
		||||
@@ -17,25 +20,39 @@ class ActivityPub::FetchRepliesService < BaseService
 | 
			
		||||
 | 
			
		||||
  private
 | 
			
		||||
 | 
			
		||||
  def collection_items(collection_or_uri)
 | 
			
		||||
  def collection_items(collection_or_uri, max_pages = nil)
 | 
			
		||||
    collection = fetch_collection(collection_or_uri)
 | 
			
		||||
    return unless collection.is_a?(Hash)
 | 
			
		||||
 | 
			
		||||
    collection = fetch_collection(collection['first']) if collection['first'].present?
 | 
			
		||||
    return unless collection.is_a?(Hash)
 | 
			
		||||
 | 
			
		||||
    case collection['type']
 | 
			
		||||
    when 'Collection', 'CollectionPage'
 | 
			
		||||
      as_array(collection['items'])
 | 
			
		||||
    when 'OrderedCollection', 'OrderedCollectionPage'
 | 
			
		||||
      as_array(collection['orderedItems'])
 | 
			
		||||
    all_items = []
 | 
			
		||||
    n_pages = 1
 | 
			
		||||
    while collection.is_a?(Hash)
 | 
			
		||||
      items = case collection['type']
 | 
			
		||||
              when 'Collection', 'CollectionPage'
 | 
			
		||||
                collection['items']
 | 
			
		||||
              when 'OrderedCollection', 'OrderedCollectionPage'
 | 
			
		||||
                collection['orderedItems']
 | 
			
		||||
              end
 | 
			
		||||
 | 
			
		||||
      all_items.concat(as_array(items))
 | 
			
		||||
 | 
			
		||||
      break if all_items.size >= MAX_REPLIES
 | 
			
		||||
      break if !max_pages.nil? && n_pages >= max_pages
 | 
			
		||||
 | 
			
		||||
      collection = collection['next'].present? ? fetch_collection(collection['next']) : nil
 | 
			
		||||
      n_pages += 1
 | 
			
		||||
    end
 | 
			
		||||
 | 
			
		||||
    [all_items, n_pages]
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def fetch_collection(collection_or_uri)
 | 
			
		||||
    return collection_or_uri if collection_or_uri.is_a?(Hash)
 | 
			
		||||
    return unless @allow_synchronous_requests
 | 
			
		||||
    return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
 | 
			
		||||
    return if filter_by_host? && non_matching_uri_hosts?(@account.uri, collection_or_uri)
 | 
			
		||||
 | 
			
		||||
    # NOTE: For backward compatibility reasons, Mastodon signs outgoing
 | 
			
		||||
    # queries incorrectly by default.
 | 
			
		||||
@@ -45,19 +62,28 @@ class ActivityPub::FetchRepliesService < BaseService
 | 
			
		||||
    #
 | 
			
		||||
    # Therefore, retry with correct signatures if this fails.
 | 
			
		||||
    begin
 | 
			
		||||
      fetch_resource_without_id_validation(collection_or_uri, nil, true)
 | 
			
		||||
      fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
 | 
			
		||||
    rescue Mastodon::UnexpectedResponseError => e
 | 
			
		||||
      raise unless e.response && e.response.code == 401 && Addressable::URI.parse(collection_or_uri).query.present?
 | 
			
		||||
 | 
			
		||||
      fetch_resource_without_id_validation(collection_or_uri, nil, true, request_options: { omit_query_string: false })
 | 
			
		||||
      fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary, request_options: { omit_query_string: false })
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  def filtered_replies
 | 
			
		||||
    # Only fetch replies to the same server as the original status to avoid
 | 
			
		||||
    # amplification attacks.
 | 
			
		||||
    if filter_by_host?
 | 
			
		||||
      # Only fetch replies to the same server as the original status to avoid
 | 
			
		||||
      # amplification attacks.
 | 
			
		||||
 | 
			
		||||
    # Also limit to 5 fetched replies to limit potential for DoS.
 | 
			
		||||
    @items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@account.uri, uri) }.take(5)
 | 
			
		||||
      # Also limit to 5 fetched replies to limit potential for DoS.
 | 
			
		||||
      @items.map { |item| value_or_id(item) }.reject { |uri| non_matching_uri_hosts?(@account.uri, uri) }.take(MAX_REPLIES)
 | 
			
		||||
    else
 | 
			
		||||
      @items.map { |item| value_or_id(item) }.take(MAX_REPLIES)
 | 
			
		||||
    end
 | 
			
		||||
  end
 | 
			
		||||
 | 
			
		||||
  # Whether replies with a different domain than the replied_to post should be rejected
 | 
			
		||||
  def filter_by_host?
 | 
			
		||||
    true
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
@@ -69,6 +69,6 @@ class ActivityPub::SynchronizeFollowersService < BaseService
 | 
			
		||||
    return collection_or_uri if collection_or_uri.is_a?(Hash)
 | 
			
		||||
    return if non_matching_uri_hosts?(@account.uri, collection_or_uri)
 | 
			
		||||
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, nil, true)
 | 
			
		||||
    fetch_resource_without_id_validation(collection_or_uri, nil, raise_on_error: :temporary)
 | 
			
		||||
  end
 | 
			
		||||
end
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user