2023-07-06 13:25:51 +02:00
package fetch
2023-05-10 16:28:45 +02:00
import (
2023-05-27 14:36:22 +02:00
"time"
2023-05-10 16:28:45 +02:00
"ewintr.nl/yogai/model"
"ewintr.nl/yogai/storage"
"github.com/google/uuid"
2023-05-10 19:27:31 +02:00
"golang.org/x/exp/slog"
2023-05-10 16:28:45 +02:00
)
type Fetcher struct {
2023-05-10 20:08:45 +02:00
interval time . Duration
2023-07-06 13:25:51 +02:00
feedRepo storage . FeedRelRepository
videoRepo storage . VideoRelRepository
2023-05-10 20:08:45 +02:00
feedReader FeedReader
2023-05-27 14:36:22 +02:00
channelReader ChannelReader
2023-05-10 20:08:45 +02:00
metadataFetcher MetadataFetcher
2023-05-27 14:36:22 +02:00
feedPipeline chan * model . Feed
videoPipeline chan * model . Video
2023-05-10 20:08:45 +02:00
needsMetadata chan * model . Video
2023-07-06 13:25:51 +02:00
out chan * model . Video
2023-05-10 20:08:45 +02:00
logger * slog . Logger
2023-05-10 16:28:45 +02:00
}
2023-07-06 13:25:51 +02:00
func NewFetch ( feedRepo storage . FeedRelRepository , videoRepo storage . VideoRelRepository , channelReader ChannelReader , feedReader FeedReader , interval time . Duration , metadataFetcher MetadataFetcher , logger * slog . Logger ) * Fetcher {
2023-05-10 16:28:45 +02:00
return & Fetcher {
2023-05-10 20:08:45 +02:00
interval : interval ,
2023-05-27 14:36:22 +02:00
feedRepo : feedRepo ,
2023-05-10 20:08:45 +02:00
videoRepo : videoRepo ,
2023-05-27 14:36:22 +02:00
channelReader : channelReader ,
2023-05-10 20:08:45 +02:00
feedReader : feedReader ,
metadataFetcher : metadataFetcher ,
2023-05-27 14:36:22 +02:00
feedPipeline : make ( chan * model . Feed , 10 ) ,
videoPipeline : make ( chan * model . Video , 10 ) ,
2023-05-13 12:53:37 +02:00
needsMetadata : make ( chan * model . Video , 10 ) ,
2023-07-06 13:25:51 +02:00
out : make ( chan * model . Video ) ,
2023-05-10 20:08:45 +02:00
logger : logger ,
2023-05-10 16:28:45 +02:00
}
}
func ( f * Fetcher ) Run ( ) {
2023-05-27 14:36:22 +02:00
go f . FetchHistoricalVideos ( )
go f . FindNewFeeds ( )
2023-05-10 16:28:45 +02:00
go f . ReadFeeds ( )
go f . MetadataFetcher ( )
2023-05-13 12:53:37 +02:00
go f . FindUnprocessed ( )
2023-05-10 16:28:45 +02:00
2023-05-27 14:36:22 +02:00
f . logger . Info ( "started videoPipeline" )
2023-05-10 16:28:45 +02:00
for {
select {
2023-05-27 14:36:22 +02:00
case video := <- f . videoPipeline :
2023-05-13 12:53:37 +02:00
if err := f . videoRepo . Save ( video ) ; err != nil {
2023-07-04 19:56:44 +02:00
f . logger . Error ( "failed to save video in normal db" , err )
2023-05-13 12:53:37 +02:00
continue
2023-05-10 16:28:45 +02:00
}
2023-07-06 13:25:51 +02:00
switch video . Status {
case model . StatusNew :
f . needsMetadata <- video
case model . StatusFetched :
f . out <- video
}
2023-05-10 16:28:45 +02:00
}
2023-05-13 12:53:37 +02:00
}
}
2023-07-06 13:25:51 +02:00
func ( f * Fetcher ) Out ( ) chan * model . Video {
return f . out
}
2023-05-27 14:36:22 +02:00
func ( f * Fetcher ) FindNewFeeds ( ) {
f . logger . Info ( "looking for new feeds" )
feeds , err := f . feedRepo . FindByStatus ( model . FeedStatusNew )
if err != nil {
f . logger . Error ( "failed to fetch feeds" , err )
return
}
for _ , feed := range feeds {
f . feedPipeline <- feed
}
}
func ( f * Fetcher ) FetchHistoricalVideos ( ) {
2023-07-06 13:25:51 +02:00
f . logger . Info ( "started historical video fetch" )
2023-05-27 14:36:22 +02:00
for feed := range f . feedPipeline {
f . logger . Info ( "fetching historical videos" , slog . String ( "channelid" , string ( feed . YoutubeChannelID ) ) )
token := ""
for {
token = f . FetchHistoricalVideoPage ( feed . YoutubeChannelID , token )
if token == "" {
break
}
}
feed . Status = model . FeedStatusReady
if err := f . feedRepo . Save ( feed ) ; err != nil {
f . logger . Error ( "failed to save feed" , err )
continue
}
}
}
func ( f * Fetcher ) FetchHistoricalVideoPage ( channelID model . YoutubeChannelID , pageToken string ) string {
f . logger . Info ( "fetching historical video page" , slog . String ( "channelid" , string ( channelID ) ) , slog . String ( "pagetoken" , pageToken ) )
ytIDs , pageToken , err := f . channelReader . Search ( channelID , pageToken )
if err != nil {
f . logger . Error ( "failed to fetch channel" , err )
return ""
}
for _ , ytID := range ytIDs {
video := & model . Video {
ID : uuid . New ( ) ,
Status : model . StatusNew ,
YoutubeID : ytID ,
YoutubeChannelID : channelID ,
}
if err := f . videoRepo . Save ( video ) ; err != nil {
f . logger . Error ( "failed to save video" , err )
continue
}
f . videoPipeline <- video
}
f . logger . Info ( "fetched historical video page" , slog . String ( "channelid" , string ( channelID ) ) , slog . String ( "pagetoken" , pageToken ) , slog . Int ( "count" , len ( ytIDs ) ) )
return pageToken
}
2023-05-13 12:53:37 +02:00
func ( f * Fetcher ) FindUnprocessed ( ) {
f . logger . Info ( "looking for unprocessed videos" )
2023-07-06 13:25:51 +02:00
videos , err := f . videoRepo . FindByStatus ( model . StatusNew , model . StatusFetched )
2023-05-13 12:53:37 +02:00
if err != nil {
f . logger . Error ( "failed to fetch unprocessed videos" , err )
return
}
f . logger . Info ( "found unprocessed videos" , slog . Int ( "count" , len ( videos ) ) )
for _ , video := range videos {
2023-05-27 14:36:22 +02:00
f . videoPipeline <- video
2023-05-10 16:28:45 +02:00
}
}
func ( f * Fetcher ) ReadFeeds ( ) {
2023-05-10 19:27:31 +02:00
f . logger . Info ( "started feed reader" )
2023-05-10 16:28:45 +02:00
ticker := time . NewTicker ( f . interval )
for range ticker . C {
entries , err := f . feedReader . Unread ( )
if err != nil {
2023-05-10 19:27:31 +02:00
f . logger . Error ( "failed to fetch unread entries" , err )
continue
2023-05-10 16:28:45 +02:00
}
2023-05-10 19:27:31 +02:00
f . logger . Info ( "fetched unread entries" , slog . Int ( "count" , len ( entries ) ) )
if len ( entries ) == 0 {
continue
}
2023-05-10 16:28:45 +02:00
for _ , entry := range entries {
video := & model . Video {
2023-05-27 14:36:22 +02:00
ID : uuid . New ( ) ,
Status : model . StatusNew ,
YoutubeID : model . YoutubeVideoID ( entry . YoutubeID ) ,
YoutubeChannelID : model . YoutubeChannelID ( entry . YoutubeChannelID ) ,
2023-05-10 16:28:45 +02:00
}
if err := f . videoRepo . Save ( video ) ; err != nil {
2023-05-10 19:27:31 +02:00
f . logger . Error ( "failed to save video" , err )
2023-05-10 16:28:45 +02:00
continue
}
2023-05-27 14:36:22 +02:00
f . videoPipeline <- video
2023-05-10 16:28:45 +02:00
if err := f . feedReader . MarkRead ( entry . EntryID ) ; err != nil {
2023-05-10 19:27:31 +02:00
f . logger . Error ( "failed to mark entry as read" , err )
continue
2023-05-10 16:28:45 +02:00
}
}
}
}
func ( f * Fetcher ) MetadataFetcher ( ) {
2023-07-06 13:25:51 +02:00
f . logger . Info ( "started metadata fetch" )
2023-05-10 19:27:31 +02:00
2023-05-10 16:28:45 +02:00
buffer := [ ] * model . Video { }
timeout := time . NewTimer ( 10 * time . Second )
fetch := make ( chan [ ] * model . Video )
go func ( ) {
for videos := range fetch {
2023-05-10 19:27:31 +02:00
f . logger . Info ( "fetching metadata" , slog . Int ( "count" , len ( videos ) ) )
2023-05-27 14:36:22 +02:00
ids := make ( [ ] model . YoutubeVideoID , 0 , len ( videos ) )
2023-05-10 20:08:45 +02:00
for _ , video := range videos {
ids = append ( ids , video . YoutubeID )
}
mds , err := f . metadataFetcher . FetchMetadata ( ids )
if err != nil {
f . logger . Error ( "failed to fetch metadata" , err )
continue
}
for _ , video := range videos {
2023-05-31 16:27:35 +02:00
md := mds [ video . YoutubeID ]
video . YoutubeTitle = md . Title
video . YoutubeDescription = md . Description
video . YoutubeDuration = md . Duration
video . YoutubePublishedAt = md . PublishedAt
2023-07-06 13:25:51 +02:00
video . Status = model . StatusFetched
2023-05-10 19:27:31 +02:00
2023-05-10 20:08:45 +02:00
if err := f . videoRepo . Save ( video ) ; err != nil {
f . logger . Error ( "failed to save video" , err )
continue
}
}
2023-05-13 12:53:37 +02:00
f . logger . Info ( "fetched metadata" , slog . Int ( "count" , len ( videos ) ) )
2023-05-10 16:28:45 +02:00
}
} ( )
for {
select {
case video := <- f . needsMetadata :
timeout . Reset ( 10 * time . Second )
buffer = append ( buffer , video )
2023-05-27 14:36:22 +02:00
if len ( buffer ) >= 50 {
2023-05-10 16:28:45 +02:00
batch := make ( [ ] * model . Video , len ( buffer ) )
copy ( batch , buffer )
fetch <- batch
buffer = [ ] * model . Video { }
}
case <- timeout . C :
if len ( buffer ) == 0 {
continue
}
batch := make ( [ ] * model . Video , len ( buffer ) )
copy ( batch , buffer )
fetch <- batch
buffer = [ ] * model . Video { }
}
}
}