From 501d6197c4a32172e2340c90379b9c3fdb925c08 Mon Sep 17 00:00:00 2001 From: Claire Date: Fri, 21 Apr 2023 18:14:19 +0200 Subject: [PATCH] Change automatic post deletion thresholds and load detection (#24614) --- .../accounts_statuses_cleanup_scheduler.rb | 45 ++++++++++--------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb index f237f1dc9..5203d4c25 100644 --- a/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb +++ b/app/workers/scheduler/accounts_statuses_cleanup_scheduler.rb @@ -7,28 +7,30 @@ class Scheduler::AccountsStatusesCleanupScheduler # This limit is mostly to be nice to the fediverse at large and not # generate too much traffic. # This also helps limiting the running time of the scheduler itself. - MAX_BUDGET = 150 + MAX_BUDGET = 300 - # This is an attempt to spread the load across instances, as various - # accounts are likely to have various followers. + # This is an attempt to spread the load across remote servers, as + # spreading deletions across diverse accounts is likely to spread + # the deletion across diverse followers. It also helps each individual + # user see some effect sooner. PER_ACCOUNT_BUDGET = 5 # This is an attempt to limit the workload generated by status removal - # jobs to something the particular instance can handle. - PER_THREAD_BUDGET = 6 + # jobs to something the particular server can handle. + PER_THREAD_BUDGET = 5 - # Those avoid loading an instance that is already under load - MAX_DEFAULT_SIZE = 200 - MAX_DEFAULT_LATENCY = 5 - MAX_PUSH_SIZE = 500 - MAX_PUSH_LATENCY = 10 - - # 'pull' queue has lower priority jobs, and it's unlikely that pushing - # deletes would cause much issues with this queue if it didn't cause issues - # with default and push. Yet, do not enqueue deletes if the instance is - # lagging behind too much. - MAX_PULL_SIZE = 10_000 - MAX_PULL_LATENCY = 5.minutes.to_i + # These are latency limits on various queues above which a server is + # considered to be under load, causing the auto-deletion to be entirely + # skipped for that run. + LOAD_LATENCY_THRESHOLDS = { + default: 5, + push: 10, + # The `pull` queue has lower priority jobs, and it's unlikely that + # pushing deletes would cause much issues with this queue if it didn't + # cause issues with `default` and `push`. Yet, do not enqueue deletes + # if the instance is lagging behind too much. + pull: 5.minutes.to_i, + }.freeze sidekiq_options retry: 0, lock: :until_executed, lock_ttl: 1.day.to_i @@ -62,19 +64,20 @@ class Scheduler::AccountsStatusesCleanupScheduler end def compute_budget + # Each post deletion is a `RemovalWorker` job (on `default` queue), each + # potentially spawning many `ActivityPub::DeliveryWorker` jobs (on the `push` queue). threads = Sidekiq::ProcessSet.new.select { |x| x['queues'].include?('push') }.pluck('concurrency').sum [PER_THREAD_BUDGET * threads, MAX_BUDGET].min end def under_load? - queue_under_load?('default', MAX_DEFAULT_SIZE, MAX_DEFAULT_LATENCY) || queue_under_load?('push', MAX_PUSH_SIZE, MAX_PUSH_LATENCY) || queue_under_load?('pull', MAX_PULL_SIZE, MAX_PULL_LATENCY) + LOAD_LATENCY_THRESHOLDS.any? { |queue, max_latency| queue_under_load?(queue, max_latency) } end private - def queue_under_load?(name, max_size, max_latency) - queue = Sidekiq::Queue.new(name) - queue.size > max_size || queue.latency > max_latency + def queue_under_load?(name, max_latency) + Sidekiq::Queue.new(name).latency > max_latency end def last_processed_id