2024-02-15 17:10:39 +01:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"fmt"
|
2024-02-22 13:05:44 +01:00
|
|
|
"slices"
|
2024-02-15 17:10:39 +01:00
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/rs/zerolog"
|
|
|
|
"github.com/uabluerail/indexer/pds"
|
|
|
|
"github.com/uabluerail/indexer/repo"
|
|
|
|
"gorm.io/gorm"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Scheduler struct {
|
|
|
|
db *gorm.DB
|
|
|
|
output chan<- WorkItem
|
|
|
|
|
|
|
|
queue map[string]*repo.Repo
|
|
|
|
inProgress map[string]*repo.Repo
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewScheduler(output chan<- WorkItem, db *gorm.DB) *Scheduler {
|
|
|
|
return &Scheduler{
|
|
|
|
db: db,
|
|
|
|
output: output,
|
|
|
|
queue: map[string]*repo.Repo{},
|
|
|
|
inProgress: map[string]*repo.Repo{},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scheduler) Start(ctx context.Context) error {
|
|
|
|
go s.run(ctx)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scheduler) run(ctx context.Context) {
|
|
|
|
log := zerolog.Ctx(ctx)
|
|
|
|
t := time.NewTicker(time.Minute)
|
|
|
|
defer t.Stop()
|
|
|
|
|
|
|
|
if err := s.fillQueue(ctx); err != nil {
|
|
|
|
log.Error().Err(err).Msgf("Failed to get more tasks for the queue: %s", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
done := make(chan string)
|
|
|
|
for {
|
|
|
|
if len(s.queue) > 0 {
|
|
|
|
next := WorkItem{signal: make(chan struct{})}
|
|
|
|
for _, r := range s.queue {
|
|
|
|
next.Repo = r
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case <-t.C:
|
|
|
|
if err := s.fillQueue(ctx); err != nil {
|
|
|
|
log.Error().Err(err).Msgf("Failed to get more tasks for the queue: %s", err)
|
|
|
|
}
|
|
|
|
case s.output <- next:
|
|
|
|
delete(s.queue, next.Repo.DID)
|
|
|
|
s.inProgress[next.Repo.DID] = next.Repo
|
|
|
|
go func(did string, ch chan struct{}) {
|
|
|
|
select {
|
|
|
|
case <-ch:
|
|
|
|
case <-ctx.Done():
|
|
|
|
}
|
|
|
|
done <- did
|
|
|
|
}(next.Repo.DID, next.signal)
|
|
|
|
s.updateQueueLenMetrics()
|
|
|
|
case did := <-done:
|
|
|
|
delete(s.inProgress, did)
|
|
|
|
s.updateQueueLenMetrics()
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return
|
|
|
|
case <-t.C:
|
|
|
|
if err := s.fillQueue(ctx); err != nil {
|
|
|
|
log.Error().Err(err).Msgf("Failed to get more tasks for the queue: %s", err)
|
|
|
|
}
|
|
|
|
case did := <-done:
|
|
|
|
delete(s.inProgress, did)
|
|
|
|
s.updateQueueLenMetrics()
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scheduler) fillQueue(ctx context.Context) error {
|
|
|
|
const maxQueueLen = 10000
|
2024-02-16 16:23:34 +01:00
|
|
|
const maxAttempts = 3
|
2024-02-15 17:10:39 +01:00
|
|
|
|
|
|
|
if len(s.queue)+len(s.inProgress) >= maxQueueLen {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
remotes := []pds.PDS{}
|
|
|
|
if err := s.db.Find(&remotes).Error; err != nil {
|
|
|
|
return fmt.Errorf("failed to get the list of PDSs: %w", err)
|
|
|
|
}
|
2024-02-22 13:05:44 +01:00
|
|
|
|
|
|
|
remotes = slices.DeleteFunc(remotes, func(pds pds.PDS) bool {
|
|
|
|
return pds.Disabled
|
|
|
|
})
|
|
|
|
perPDSLimit := maxQueueLen
|
2024-02-19 19:34:26 +01:00
|
|
|
if len(remotes) > 0 {
|
|
|
|
perPDSLimit = maxQueueLen * 2 / len(remotes)
|
|
|
|
}
|
2024-02-15 17:10:39 +01:00
|
|
|
|
2024-02-21 10:35:25 +01:00
|
|
|
// Fake remote to account for repos we didn't have a PDS for yet.
|
|
|
|
remotes = append(remotes, pds.PDS{ID: pds.Unknown})
|
|
|
|
|
2024-02-15 17:10:39 +01:00
|
|
|
for _, remote := range remotes {
|
|
|
|
repos := []repo.Repo{}
|
|
|
|
|
2024-02-16 16:23:34 +01:00
|
|
|
err := s.db.Raw(`SELECT * FROM "repos" WHERE pds = ? AND (last_indexed_rev is null OR last_indexed_rev = '') AND failed_attempts < ?
|
2024-02-15 19:39:29 +01:00
|
|
|
UNION
|
2024-02-16 16:23:34 +01:00
|
|
|
SELECT * FROM "repos" WHERE pds = ? AND (first_rev_since_reset is not null AND first_rev_since_reset <> '' AND last_indexed_rev < first_rev_since_reset) AND failed_attempts < ? LIMIT ?`,
|
|
|
|
remote.ID, maxAttempts, remote.ID, maxAttempts, perPDSLimit).
|
2024-02-15 19:39:29 +01:00
|
|
|
Scan(&repos).Error
|
2024-02-15 17:10:39 +01:00
|
|
|
|
|
|
|
if err != nil {
|
|
|
|
return fmt.Errorf("querying DB: %w", err)
|
|
|
|
}
|
|
|
|
for _, r := range repos {
|
|
|
|
if s.queue[r.DID] != nil || s.inProgress[r.DID] != nil {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
copied := r
|
|
|
|
s.queue[r.DID] = &copied
|
|
|
|
reposQueued.Inc()
|
|
|
|
}
|
|
|
|
s.updateQueueLenMetrics()
|
|
|
|
}
|
|
|
|
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (s *Scheduler) updateQueueLenMetrics() {
|
|
|
|
queueLenght.WithLabelValues("queued").Set(float64(len(s.queue)))
|
|
|
|
queueLenght.WithLabelValues("inProgress").Set(float64(len(s.inProgress)))
|
|
|
|
}
|