Change trending hashtags to not disappear instantly after midnight (#11712)
parent
ac6935b629
commit
70ddef2654
|
@ -57,7 +57,7 @@ module Admin
|
||||||
scope = scope.unreviewed if filter_params[:review] == 'unreviewed'
|
scope = scope.unreviewed if filter_params[:review] == 'unreviewed'
|
||||||
scope = scope.reviewed.order(reviewed_at: :desc) if filter_params[:review] == 'reviewed'
|
scope = scope.reviewed.order(reviewed_at: :desc) if filter_params[:review] == 'reviewed'
|
||||||
scope = scope.pending_review.order(requested_review_at: :desc) if filter_params[:review] == 'pending_review'
|
scope = scope.pending_review.order(requested_review_at: :desc) if filter_params[:review] == 'pending_review'
|
||||||
scope.order(score: :desc)
|
scope.order(max_score: :desc)
|
||||||
end
|
end
|
||||||
|
|
||||||
def filter_params
|
def filter_params
|
||||||
|
|
|
@ -63,7 +63,7 @@ class FeedManager
|
||||||
reblog_key = key(type, account_id, 'reblogs')
|
reblog_key = key(type, account_id, 'reblogs')
|
||||||
|
|
||||||
# Remove any items past the MAX_ITEMS'th entry in our feed
|
# Remove any items past the MAX_ITEMS'th entry in our feed
|
||||||
redis.zremrangebyrank(timeline_key, '0', (-(FeedManager::MAX_ITEMS + 1)).to_s)
|
redis.zremrangebyrank(timeline_key, 0, -(FeedManager::MAX_ITEMS + 1))
|
||||||
|
|
||||||
# Get the score of the REBLOG_FALLOFF'th item in our feed, and stop
|
# Get the score of the REBLOG_FALLOFF'th item in our feed, and stop
|
||||||
# tracking anything after it for deduplication purposes.
|
# tracking anything after it for deduplication purposes.
|
||||||
|
|
|
@ -7,14 +7,14 @@
|
||||||
# name :string default(""), not null
|
# name :string default(""), not null
|
||||||
# created_at :datetime not null
|
# created_at :datetime not null
|
||||||
# updated_at :datetime not null
|
# updated_at :datetime not null
|
||||||
# score :integer
|
|
||||||
# usable :boolean
|
# usable :boolean
|
||||||
# trendable :boolean
|
# trendable :boolean
|
||||||
# listable :boolean
|
# listable :boolean
|
||||||
# reviewed_at :datetime
|
# reviewed_at :datetime
|
||||||
# requested_review_at :datetime
|
# requested_review_at :datetime
|
||||||
# last_status_at :datetime
|
# last_status_at :datetime
|
||||||
# last_trend_at :datetime
|
# max_score :float
|
||||||
|
# max_score_at :datetime
|
||||||
#
|
#
|
||||||
|
|
||||||
class Tag < ApplicationRecord
|
class Tag < ApplicationRecord
|
||||||
|
|
|
@ -7,6 +7,8 @@ class TrendingTags
|
||||||
THRESHOLD = 5
|
THRESHOLD = 5
|
||||||
LIMIT = 10
|
LIMIT = 10
|
||||||
REVIEW_THRESHOLD = 3
|
REVIEW_THRESHOLD = 3
|
||||||
|
MAX_SCORE_COOLDOWN = 3.days.freeze
|
||||||
|
MAX_SCORE_HALFLIFE = 6.hours.freeze
|
||||||
|
|
||||||
class << self
|
class << self
|
||||||
include Redisable
|
include Redisable
|
||||||
|
@ -16,14 +18,75 @@ class TrendingTags
|
||||||
|
|
||||||
increment_historical_use!(tag.id, at_time)
|
increment_historical_use!(tag.id, at_time)
|
||||||
increment_unique_use!(tag.id, account.id, at_time)
|
increment_unique_use!(tag.id, account.id, at_time)
|
||||||
increment_vote!(tag, at_time)
|
increment_use!(tag.id, at_time)
|
||||||
|
|
||||||
tag.update(last_status_at: Time.now.utc) if tag.last_status_at.nil? || tag.last_status_at < 12.hours.ago
|
tag.update(last_status_at: Time.now.utc) if tag.last_status_at.nil? || tag.last_status_at < 12.hours.ago
|
||||||
tag.update(last_trend_at: Time.now.utc) if trending?(tag) && (tag.last_trend_at.nil? || tag.last_trend_at < 12.hours.ago)
|
end
|
||||||
|
|
||||||
|
def update!(at_time = Time.now.utc)
|
||||||
|
tag_ids = redis.smembers("#{KEY}:used:#{at_time.beginning_of_day.to_i}") + redis.zrange(KEY, 0, -1)
|
||||||
|
tags = Tag.where(id: tag_ids.uniq)
|
||||||
|
|
||||||
|
# First pass to calculate scores and update the set
|
||||||
|
|
||||||
|
tags.each do |tag|
|
||||||
|
expected = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f
|
||||||
|
expected = 1.0 if expected.zero?
|
||||||
|
observed = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f
|
||||||
|
max_time = tag.max_score_at
|
||||||
|
max_score = tag.max_score
|
||||||
|
max_score = 0 if max_time.nil? || max_time < (at_time - MAX_SCORE_COOLDOWN)
|
||||||
|
|
||||||
|
score = begin
|
||||||
|
if expected > observed || observed < THRESHOLD
|
||||||
|
0
|
||||||
|
else
|
||||||
|
((observed - expected)**2) / expected
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
if score > max_score
|
||||||
|
max_score = score
|
||||||
|
max_time = at_time
|
||||||
|
|
||||||
|
# Not interested in triggering any callbacks for this
|
||||||
|
tag.update_columns(max_score: max_score, max_score_at: max_time)
|
||||||
|
end
|
||||||
|
|
||||||
|
decaying_score = max_score * (0.5**((at_time.to_f - max_time.to_f) / MAX_SCORE_HALFLIFE.to_f))
|
||||||
|
|
||||||
|
if decaying_score.zero?
|
||||||
|
redis.zrem(KEY, tag.id)
|
||||||
|
else
|
||||||
|
redis.zadd(KEY, decaying_score, tag.id)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
users_for_review = User.staff.includes(:account).to_a.select(&:allows_trending_tag_emails?)
|
||||||
|
|
||||||
|
# Second pass to notify about previously unreviewed trends
|
||||||
|
|
||||||
|
tags.each do |tag|
|
||||||
|
current_rank = redis.zrevrank(KEY, tag.id)
|
||||||
|
needs_review_notification = tag.requires_review? && !tag.requested_review?
|
||||||
|
rank_passes_threshold = current_rank.present? && current_rank <= REVIEW_THRESHOLD
|
||||||
|
|
||||||
|
next unless !tag.trendable? && rank_passes_threshold && needs_review_notification
|
||||||
|
|
||||||
|
tag.touch(:requested_review_at)
|
||||||
|
|
||||||
|
users_for_review.each do |user|
|
||||||
|
AdminMailer.new_trending_tag(user.account, tag).deliver_later!
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Trim older items
|
||||||
|
|
||||||
|
redis.zremrangebyrank(KEY, 0, -(LIMIT + 1))
|
||||||
end
|
end
|
||||||
|
|
||||||
def get(limit, filtered: true)
|
def get(limit, filtered: true)
|
||||||
tag_ids = redis.zrevrange("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", 0, LIMIT - 1).map(&:to_i)
|
tag_ids = redis.zrevrange(KEY, 0, LIMIT - 1).map(&:to_i)
|
||||||
|
|
||||||
tags = Tag.where(id: tag_ids)
|
tags = Tag.where(id: tag_ids)
|
||||||
tags = tags.where(trendable: true) if filtered
|
tags = tags.where(trendable: true) if filtered
|
||||||
|
@ -33,8 +96,8 @@ class TrendingTags
|
||||||
end
|
end
|
||||||
|
|
||||||
def trending?(tag)
|
def trending?(tag)
|
||||||
rank = redis.zrevrank("#{KEY}:#{Time.now.utc.beginning_of_day.to_i}", tag.id)
|
rank = redis.zrevrank(KEY, tag.id)
|
||||||
rank.present? && rank <= LIMIT
|
rank.present? && rank < LIMIT
|
||||||
end
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
@ -51,31 +114,10 @@ class TrendingTags
|
||||||
redis.expire(key, EXPIRE_HISTORY_AFTER)
|
redis.expire(key, EXPIRE_HISTORY_AFTER)
|
||||||
end
|
end
|
||||||
|
|
||||||
def increment_vote!(tag, at_time)
|
def increment_use!(tag_id, at_time)
|
||||||
key = "#{KEY}:#{at_time.beginning_of_day.to_i}"
|
key = "#{KEY}:used:#{at_time.beginning_of_day.to_i}"
|
||||||
expected = redis.pfcount("activity:tags:#{tag.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts").to_f
|
redis.sadd(key, tag_id)
|
||||||
expected = 1.0 if expected.zero?
|
redis.expire(key, EXPIRE_HISTORY_AFTER)
|
||||||
observed = redis.pfcount("activity:tags:#{tag.id}:#{at_time.beginning_of_day.to_i}:accounts").to_f
|
|
||||||
|
|
||||||
if expected > observed || observed < THRESHOLD
|
|
||||||
redis.zrem(key, tag.id)
|
|
||||||
else
|
|
||||||
score = ((observed - expected)**2) / expected
|
|
||||||
old_rank = redis.zrevrank(key, tag.id)
|
|
||||||
|
|
||||||
redis.zadd(key, score, tag.id)
|
|
||||||
request_review!(tag) if (old_rank.nil? || old_rank > REVIEW_THRESHOLD) && redis.zrevrank(key, tag.id) <= REVIEW_THRESHOLD && !tag.trendable? && tag.requires_review? && !tag.requested_review?
|
|
||||||
end
|
|
||||||
|
|
||||||
redis.expire(key, EXPIRE_TRENDS_AFTER)
|
|
||||||
end
|
|
||||||
|
|
||||||
def request_review!(tag)
|
|
||||||
return unless Setting.trends
|
|
||||||
|
|
||||||
tag.touch(:requested_review_at)
|
|
||||||
|
|
||||||
User.staff.includes(:account).find_each { |u| AdminMailer.new_trending_tag(u.account, tag).deliver_later! if u.allows_trending_tag_emails? }
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
|
@ -0,0 +1,11 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class Scheduler::TrendingTagsScheduler
|
||||||
|
include Sidekiq::Worker
|
||||||
|
|
||||||
|
sidekiq_options unique: :until_executed, retry: 0
|
||||||
|
|
||||||
|
def perform
|
||||||
|
TrendingTags.update! if Setting.trends
|
||||||
|
end
|
||||||
|
end
|
|
@ -9,6 +9,9 @@
|
||||||
scheduled_statuses_scheduler:
|
scheduled_statuses_scheduler:
|
||||||
every: '5m'
|
every: '5m'
|
||||||
class: Scheduler::ScheduledStatusesScheduler
|
class: Scheduler::ScheduledStatusesScheduler
|
||||||
|
trending_tags_scheduler:
|
||||||
|
every: '5m'
|
||||||
|
class: Scheduler::TrendingTagsScheduler
|
||||||
media_cleanup_scheduler:
|
media_cleanup_scheduler:
|
||||||
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
|
cron: '<%= Random.rand(0..59) %> <%= Random.rand(3..5) %> * * *'
|
||||||
class: Scheduler::MediaCleanupScheduler
|
class: Scheduler::MediaCleanupScheduler
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
class AddMaxScoreToTags < ActiveRecord::Migration[5.2]
|
||||||
|
def change
|
||||||
|
add_column :tags, :max_score, :float
|
||||||
|
add_column :tags, :max_score_at, :datetime
|
||||||
|
end
|
||||||
|
end
|
|
@ -0,0 +1,12 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
class RemoveScoreFromTags < ActiveRecord::Migration[5.2]
|
||||||
|
disable_ddl_transaction!
|
||||||
|
|
||||||
|
def change
|
||||||
|
safety_assured do
|
||||||
|
remove_column :tags, :score, :int
|
||||||
|
remove_column :tags, :last_trend_at, :datetime
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
|
@ -10,7 +10,7 @@
|
||||||
#
|
#
|
||||||
# It's strongly recommended that you check this file into your version control system.
|
# It's strongly recommended that you check this file into your version control system.
|
||||||
|
|
||||||
ActiveRecord::Schema.define(version: 2019_08_23_221802) do
|
ActiveRecord::Schema.define(version: 2019_09_01_040524) do
|
||||||
|
|
||||||
# These are extensions that must be enabled in order to support this database
|
# These are extensions that must be enabled in order to support this database
|
||||||
enable_extension "plpgsql"
|
enable_extension "plpgsql"
|
||||||
|
@ -664,14 +664,14 @@ ActiveRecord::Schema.define(version: 2019_08_23_221802) do
|
||||||
t.string "name", default: "", null: false
|
t.string "name", default: "", null: false
|
||||||
t.datetime "created_at", null: false
|
t.datetime "created_at", null: false
|
||||||
t.datetime "updated_at", null: false
|
t.datetime "updated_at", null: false
|
||||||
t.integer "score"
|
|
||||||
t.boolean "usable"
|
t.boolean "usable"
|
||||||
t.boolean "trendable"
|
t.boolean "trendable"
|
||||||
t.boolean "listable"
|
t.boolean "listable"
|
||||||
t.datetime "reviewed_at"
|
t.datetime "reviewed_at"
|
||||||
t.datetime "requested_review_at"
|
t.datetime "requested_review_at"
|
||||||
t.datetime "last_status_at"
|
t.datetime "last_status_at"
|
||||||
t.datetime "last_trend_at"
|
t.float "max_score"
|
||||||
|
t.datetime "max_score_at"
|
||||||
t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true
|
t.index "lower((name)::text)", name: "index_tags_on_name_lower", unique: true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,68 @@
|
||||||
|
require 'rails_helper'
|
||||||
|
|
||||||
|
RSpec.describe TrendingTags do
|
||||||
|
describe '.record_use!' do
|
||||||
|
pending
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '.update!' do
|
||||||
|
let!(:at_time) { Time.now.utc }
|
||||||
|
let!(:tag1) { Fabricate(:tag, name: 'Catstodon') }
|
||||||
|
let!(:tag2) { Fabricate(:tag, name: 'DogsOfMastodon') }
|
||||||
|
let!(:tag3) { Fabricate(:tag, name: 'OCs') }
|
||||||
|
|
||||||
|
before do
|
||||||
|
allow(Redis.current).to receive(:pfcount) do |key|
|
||||||
|
case key
|
||||||
|
when "activity:tags:#{tag1.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
|
||||||
|
2
|
||||||
|
when "activity:tags:#{tag1.id}:#{at_time.beginning_of_day.to_i}:accounts"
|
||||||
|
16
|
||||||
|
when "activity:tags:#{tag2.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
|
||||||
|
0
|
||||||
|
when "activity:tags:#{tag2.id}:#{at_time.beginning_of_day.to_i}:accounts"
|
||||||
|
4
|
||||||
|
when "activity:tags:#{tag3.id}:#{(at_time - 1.day).beginning_of_day.to_i}:accounts"
|
||||||
|
13
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Redis.current.zadd('trending_tags', 0.9, tag3.id)
|
||||||
|
Redis.current.sadd("trending_tags:used:#{at_time.beginning_of_day.to_i}", [tag1.id, tag2.id])
|
||||||
|
|
||||||
|
tag3.update(max_score: 0.9, max_score_at: (at_time - 1.day).beginning_of_day + 12.hours)
|
||||||
|
|
||||||
|
described_class.update!(at_time)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'calculates and re-calculates scores' do
|
||||||
|
expect(described_class.get(10, filtered: false)).to eq [tag1, tag3]
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'omits hashtags below threshold' do
|
||||||
|
expect(described_class.get(10, filtered: false)).to_not include(tag2)
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'decays scores' do
|
||||||
|
expect(Redis.current.zscore('trending_tags', tag3.id)).to be < 0.9
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
describe '.trending?' do
|
||||||
|
let(:tag) { Fabricate(:tag) }
|
||||||
|
|
||||||
|
before do
|
||||||
|
10.times { |i| Redis.current.zadd('trending_tags', i + 1, Fabricate(:tag).id) }
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns true if the hashtag is within limit' do
|
||||||
|
Redis.current.zadd('trending_tags', 11, tag.id)
|
||||||
|
expect(described_class.trending?(tag)).to be true
|
||||||
|
end
|
||||||
|
|
||||||
|
it 'returns false if the hashtag is outside the limit' do
|
||||||
|
Redis.current.zadd('trending_tags', 0, tag.id)
|
||||||
|
expect(described_class.trending?(tag)).to be false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
Reference in New Issue