2023-05-10 16:28:45 +02:00
|
|
|
package fetcher
|
|
|
|
|
|
|
|
import (
|
|
|
|
"ewintr.nl/yogai/model"
|
|
|
|
"ewintr.nl/yogai/storage"
|
|
|
|
"github.com/google/uuid"
|
2023-05-10 19:27:31 +02:00
|
|
|
"golang.org/x/exp/slog"
|
2023-05-10 16:28:45 +02:00
|
|
|
"time"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Fetcher struct {
|
2023-05-10 20:08:45 +02:00
|
|
|
interval time.Duration
|
|
|
|
videoRepo storage.VideoRepository
|
|
|
|
feedReader FeedReader
|
|
|
|
metadataFetcher MetadataFetcher
|
2023-05-13 12:53:37 +02:00
|
|
|
summaryFetcher SummaryFetcher
|
2023-05-10 20:08:45 +02:00
|
|
|
pipeline chan *model.Video
|
|
|
|
needsMetadata chan *model.Video
|
2023-05-13 12:53:37 +02:00
|
|
|
needsSummary chan *model.Video
|
2023-05-10 20:08:45 +02:00
|
|
|
logger *slog.Logger
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
|
2023-05-13 12:53:37 +02:00
|
|
|
func NewFetch(videoRepo storage.VideoRepository, feedReader FeedReader, interval time.Duration, metadataFetcher MetadataFetcher, summaryFetcher SummaryFetcher, logger *slog.Logger) *Fetcher {
|
2023-05-10 16:28:45 +02:00
|
|
|
return &Fetcher{
|
2023-05-10 20:08:45 +02:00
|
|
|
interval: interval,
|
|
|
|
videoRepo: videoRepo,
|
|
|
|
feedReader: feedReader,
|
|
|
|
metadataFetcher: metadataFetcher,
|
2023-05-13 12:53:37 +02:00
|
|
|
summaryFetcher: summaryFetcher,
|
|
|
|
pipeline: make(chan *model.Video, 10),
|
|
|
|
needsMetadata: make(chan *model.Video, 10),
|
|
|
|
needsSummary: make(chan *model.Video, 10),
|
2023-05-10 20:08:45 +02:00
|
|
|
logger: logger,
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) Run() {
|
|
|
|
go f.ReadFeeds()
|
|
|
|
go f.MetadataFetcher()
|
2023-05-13 12:53:37 +02:00
|
|
|
go f.SummaryFetcher()
|
|
|
|
go f.FindUnprocessed()
|
2023-05-10 16:28:45 +02:00
|
|
|
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Info("started pipeline")
|
2023-05-10 16:28:45 +02:00
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case video := <-f.pipeline:
|
|
|
|
switch video.Status {
|
2023-05-13 12:53:37 +02:00
|
|
|
case model.StatusNew:
|
2023-05-10 16:28:45 +02:00
|
|
|
f.needsMetadata <- video
|
2023-05-13 12:53:37 +02:00
|
|
|
case model.StatusHasMetadata:
|
|
|
|
f.needsSummary <- video
|
|
|
|
case model.StatusHasSummary:
|
|
|
|
video.Status = model.StatusReady
|
|
|
|
f.logger.Info("video is ready", slog.String("id", video.ID.String()))
|
|
|
|
|
|
|
|
}
|
|
|
|
if err := f.videoRepo.Save(video); err != nil {
|
|
|
|
f.logger.Error("failed to save video", err)
|
|
|
|
continue
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
}
|
2023-05-13 12:53:37 +02:00
|
|
|
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) FindUnprocessed() {
|
|
|
|
f.logger.Info("looking for unprocessed videos")
|
|
|
|
videos, err := f.videoRepo.FindByStatus(model.StatusNew, model.StatusHasMetadata)
|
|
|
|
if err != nil {
|
|
|
|
f.logger.Error("failed to fetch unprocessed videos", err)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
f.logger.Info("found unprocessed videos", slog.Int("count", len(videos)))
|
|
|
|
for _, video := range videos {
|
|
|
|
f.pipeline <- video
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) ReadFeeds() {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Info("started feed reader")
|
2023-05-10 16:28:45 +02:00
|
|
|
ticker := time.NewTicker(f.interval)
|
|
|
|
for range ticker.C {
|
|
|
|
entries, err := f.feedReader.Unread()
|
|
|
|
if err != nil {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Error("failed to fetch unread entries", err)
|
|
|
|
continue
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Info("fetched unread entries", slog.Int("count", len(entries)))
|
|
|
|
if len(entries) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2023-05-10 16:28:45 +02:00
|
|
|
for _, entry := range entries {
|
|
|
|
video := &model.Video{
|
|
|
|
ID: uuid.New(),
|
2023-05-13 12:53:37 +02:00
|
|
|
Status: model.StatusNew,
|
2023-05-10 16:28:45 +02:00
|
|
|
YoutubeID: entry.YouTubeID,
|
|
|
|
// feed id
|
|
|
|
}
|
|
|
|
if err := f.videoRepo.Save(video); err != nil {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Error("failed to save video", err)
|
2023-05-10 16:28:45 +02:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
f.pipeline <- video
|
|
|
|
if err := f.feedReader.MarkRead(entry.EntryID); err != nil {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Error("failed to mark entry as read", err)
|
|
|
|
continue
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) MetadataFetcher() {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Info("started metadata fetcher")
|
|
|
|
|
2023-05-10 16:28:45 +02:00
|
|
|
buffer := []*model.Video{}
|
|
|
|
timeout := time.NewTimer(10 * time.Second)
|
|
|
|
fetch := make(chan []*model.Video)
|
|
|
|
|
|
|
|
go func() {
|
|
|
|
for videos := range fetch {
|
2023-05-10 19:27:31 +02:00
|
|
|
f.logger.Info("fetching metadata", slog.Int("count", len(videos)))
|
2023-05-10 20:08:45 +02:00
|
|
|
ids := make([]string, 0, len(videos))
|
|
|
|
for _, video := range videos {
|
|
|
|
ids = append(ids, video.YoutubeID)
|
|
|
|
}
|
|
|
|
mds, err := f.metadataFetcher.FetchMetadata(ids)
|
|
|
|
if err != nil {
|
|
|
|
f.logger.Error("failed to fetch metadata", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
for _, video := range videos {
|
|
|
|
video.Title = mds[video.YoutubeID].Title
|
|
|
|
video.Description = mds[video.YoutubeID].Description
|
2023-05-13 12:53:37 +02:00
|
|
|
video.Status = model.StatusHasMetadata
|
2023-05-10 19:27:31 +02:00
|
|
|
|
2023-05-10 20:08:45 +02:00
|
|
|
if err := f.videoRepo.Save(video); err != nil {
|
|
|
|
f.logger.Error("failed to save video", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
2023-05-13 12:53:37 +02:00
|
|
|
f.logger.Info("fetched metadata", slog.Int("count", len(videos)))
|
2023-05-10 16:28:45 +02:00
|
|
|
}
|
|
|
|
}()
|
|
|
|
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case video := <-f.needsMetadata:
|
|
|
|
timeout.Reset(10 * time.Second)
|
|
|
|
buffer = append(buffer, video)
|
|
|
|
if len(buffer) >= 10 {
|
|
|
|
batch := make([]*model.Video, len(buffer))
|
|
|
|
copy(batch, buffer)
|
|
|
|
fetch <- batch
|
|
|
|
buffer = []*model.Video{}
|
|
|
|
}
|
|
|
|
case <-timeout.C:
|
|
|
|
if len(buffer) == 0 {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
batch := make([]*model.Video, len(buffer))
|
|
|
|
copy(batch, buffer)
|
|
|
|
fetch <- batch
|
|
|
|
buffer = []*model.Video{}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-05-13 12:53:37 +02:00
|
|
|
|
|
|
|
func (f *Fetcher) SummaryFetcher() {
|
|
|
|
for {
|
|
|
|
select {
|
|
|
|
case video := <-f.needsSummary:
|
|
|
|
f.logger.Info("fetching summary", slog.String("id", video.ID.String()))
|
|
|
|
if err := f.summaryFetcher.FetchSummary(video); err != nil {
|
|
|
|
f.logger.Error("failed to fetch summary", err)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
video.Status = model.StatusHasSummary
|
|
|
|
f.logger.Info("fetched summary", slog.String("id", video.ID.String()))
|
|
|
|
f.pipeline <- video
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|