summary in pipeline
This commit is contained in:
parent
b063be21b4
commit
600cde5279
|
@ -13,19 +13,23 @@ type Fetcher struct {
|
||||||
videoRepo storage.VideoRepository
|
videoRepo storage.VideoRepository
|
||||||
feedReader FeedReader
|
feedReader FeedReader
|
||||||
metadataFetcher MetadataFetcher
|
metadataFetcher MetadataFetcher
|
||||||
|
summaryFetcher SummaryFetcher
|
||||||
pipeline chan *model.Video
|
pipeline chan *model.Video
|
||||||
needsMetadata chan *model.Video
|
needsMetadata chan *model.Video
|
||||||
|
needsSummary chan *model.Video
|
||||||
logger *slog.Logger
|
logger *slog.Logger
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewFetch(videoRepo storage.VideoRepository, feedReader FeedReader, interval time.Duration, metadataFetcher MetadataFetcher, logger *slog.Logger) *Fetcher {
|
func NewFetch(videoRepo storage.VideoRepository, feedReader FeedReader, interval time.Duration, metadataFetcher MetadataFetcher, summaryFetcher SummaryFetcher, logger *slog.Logger) *Fetcher {
|
||||||
return &Fetcher{
|
return &Fetcher{
|
||||||
interval: interval,
|
interval: interval,
|
||||||
videoRepo: videoRepo,
|
videoRepo: videoRepo,
|
||||||
feedReader: feedReader,
|
feedReader: feedReader,
|
||||||
metadataFetcher: metadataFetcher,
|
metadataFetcher: metadataFetcher,
|
||||||
pipeline: make(chan *model.Video),
|
summaryFetcher: summaryFetcher,
|
||||||
needsMetadata: make(chan *model.Video),
|
pipeline: make(chan *model.Video, 10),
|
||||||
|
needsMetadata: make(chan *model.Video, 10),
|
||||||
|
needsSummary: make(chan *model.Video, 10),
|
||||||
logger: logger,
|
logger: logger,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -33,16 +37,42 @@ func NewFetch(videoRepo storage.VideoRepository, feedReader FeedReader, interval
|
||||||
func (f *Fetcher) Run() {
|
func (f *Fetcher) Run() {
|
||||||
go f.ReadFeeds()
|
go f.ReadFeeds()
|
||||||
go f.MetadataFetcher()
|
go f.MetadataFetcher()
|
||||||
|
go f.SummaryFetcher()
|
||||||
|
go f.FindUnprocessed()
|
||||||
|
|
||||||
f.logger.Info("started pipeline")
|
f.logger.Info("started pipeline")
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case video := <-f.pipeline:
|
case video := <-f.pipeline:
|
||||||
switch video.Status {
|
switch video.Status {
|
||||||
case model.STATUS_NEW:
|
case model.StatusNew:
|
||||||
f.needsMetadata <- video
|
f.needsMetadata <- video
|
||||||
|
case model.StatusHasMetadata:
|
||||||
|
f.needsSummary <- video
|
||||||
|
case model.StatusHasSummary:
|
||||||
|
video.Status = model.StatusReady
|
||||||
|
f.logger.Info("video is ready", slog.String("id", video.ID.String()))
|
||||||
|
|
||||||
|
}
|
||||||
|
if err := f.videoRepo.Save(video); err != nil {
|
||||||
|
f.logger.Error("failed to save video", err)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Fetcher) FindUnprocessed() {
|
||||||
|
f.logger.Info("looking for unprocessed videos")
|
||||||
|
videos, err := f.videoRepo.FindByStatus(model.StatusNew, model.StatusHasMetadata)
|
||||||
|
if err != nil {
|
||||||
|
f.logger.Error("failed to fetch unprocessed videos", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
f.logger.Info("found unprocessed videos", slog.Int("count", len(videos)))
|
||||||
|
for _, video := range videos {
|
||||||
|
f.pipeline <- video
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -63,7 +93,7 @@ func (f *Fetcher) ReadFeeds() {
|
||||||
for _, entry := range entries {
|
for _, entry := range entries {
|
||||||
video := &model.Video{
|
video := &model.Video{
|
||||||
ID: uuid.New(),
|
ID: uuid.New(),
|
||||||
Status: model.STATUS_NEW,
|
Status: model.StatusNew,
|
||||||
YoutubeID: entry.YouTubeID,
|
YoutubeID: entry.YouTubeID,
|
||||||
// feed id
|
// feed id
|
||||||
}
|
}
|
||||||
|
@ -102,12 +132,14 @@ func (f *Fetcher) MetadataFetcher() {
|
||||||
for _, video := range videos {
|
for _, video := range videos {
|
||||||
video.Title = mds[video.YoutubeID].Title
|
video.Title = mds[video.YoutubeID].Title
|
||||||
video.Description = mds[video.YoutubeID].Description
|
video.Description = mds[video.YoutubeID].Description
|
||||||
|
video.Status = model.StatusHasMetadata
|
||||||
|
|
||||||
if err := f.videoRepo.Save(video); err != nil {
|
if err := f.videoRepo.Save(video); err != nil {
|
||||||
f.logger.Error("failed to save video", err)
|
f.logger.Error("failed to save video", err)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
f.logger.Info("fetched metadata", slog.Int("count", len(videos)))
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
@ -133,3 +165,19 @@ func (f *Fetcher) MetadataFetcher() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (f *Fetcher) SummaryFetcher() {
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case video := <-f.needsSummary:
|
||||||
|
f.logger.Info("fetching summary", slog.String("id", video.ID.String()))
|
||||||
|
if err := f.summaryFetcher.FetchSummary(video); err != nil {
|
||||||
|
f.logger.Error("failed to fetch summary", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
video.Status = model.StatusHasSummary
|
||||||
|
f.logger.Info("fetched summary", slog.String("id", video.ID.String()))
|
||||||
|
f.pipeline <- video
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,49 @@
|
||||||
|
package fetcher
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"ewintr.nl/yogai/model"
|
||||||
|
"fmt"
|
||||||
|
"github.com/sashabaranov/go-openai"
|
||||||
|
)
|
||||||
|
|
||||||
|
const summarizePrompt = `You are an helpful assistant. Your task is to extract all text that refers to the content of a yoga workout video from the description a user gives you.
|
||||||
|
You will not add introductory sentences like "This text is about", or "Summary of...". Just give the words verbatim. Trim any white space back to a simple space
|
||||||
|
`
|
||||||
|
|
||||||
|
type OpenAI struct {
|
||||||
|
client *openai.Client
|
||||||
|
}
|
||||||
|
|
||||||
|
func NewOpenAI(apiKey string) *OpenAI {
|
||||||
|
return &OpenAI{
|
||||||
|
client: openai.NewClient(apiKey),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (o *OpenAI) FetchSummary(video *model.Video) error {
|
||||||
|
resp, err := o.client.CreateChatCompletion(
|
||||||
|
context.Background(),
|
||||||
|
openai.ChatCompletionRequest{
|
||||||
|
Model: openai.GPT4,
|
||||||
|
Messages: []openai.ChatCompletionMessage{
|
||||||
|
{
|
||||||
|
Role: openai.ChatMessageRoleSystem,
|
||||||
|
Content: summarizePrompt,
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
Role: openai.ChatMessageRoleUser,
|
||||||
|
Content: fmt.Sprintf("%s\n\n%s", video.Title, video.Description),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
})
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to fetch summary: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
video.Summary = resp.Choices[len(resp.Choices)-1].Message.Content
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
|
@ -0,0 +1,7 @@
|
||||||
|
package fetcher
|
||||||
|
|
||||||
|
import "ewintr.nl/yogai/model"
|
||||||
|
|
||||||
|
type SummaryFetcher interface {
|
||||||
|
FetchSummary(video *model.Video) error
|
||||||
|
}
|
1
go.mod
1
go.mod
|
@ -5,6 +5,7 @@ go 1.20
|
||||||
require (
|
require (
|
||||||
github.com/google/uuid v1.3.0
|
github.com/google/uuid v1.3.0
|
||||||
github.com/lib/pq v1.10.9
|
github.com/lib/pq v1.10.9
|
||||||
|
github.com/sashabaranov/go-openai v1.9.4
|
||||||
golang.org/x/exp v0.0.0-20230425010034-47ecfdc1ba53
|
golang.org/x/exp v0.0.0-20230425010034-47ecfdc1ba53
|
||||||
google.golang.org/api v0.122.0
|
google.golang.org/api v0.122.0
|
||||||
miniflux.app v0.0.0-20230505000442-88062ab9f959
|
miniflux.app v0.0.0-20230505000442-88062ab9f959
|
||||||
|
|
2
go.sum
2
go.sum
|
@ -69,6 +69,8 @@ github.com/lib/pq v1.10.9/go.mod h1:AlVN5x4E4T544tWzH6hKfbfQvm3HdbOxrmggDNAPY9o=
|
||||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||||
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6LYCDYWNEvQ=
|
||||||
|
github.com/sashabaranov/go-openai v1.9.4 h1:KanoCEoowAI45jVXlenMCckutSRr39qOmSi9MyPBfZM=
|
||||||
|
github.com/sashabaranov/go-openai v1.9.4/go.mod h1:lj5b/K+zjTSFxVLijLSTDZuP7adOgerWeFyZLUhAKRg=
|
||||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||||
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
|
||||||
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
package handler
|
|
@ -0,0 +1 @@
|
||||||
|
package handler
|
|
@ -5,8 +5,10 @@ import "github.com/google/uuid"
|
||||||
type Status string
|
type Status string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
STATUS_NEW Status = "new"
|
StatusNew Status = "new"
|
||||||
STATUS_READY Status = "ready"
|
StatusHasMetadata Status = "has_metadata"
|
||||||
|
StatusHasSummary Status = "has_summary"
|
||||||
|
StatusReady Status = "ready"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Video struct {
|
type Video struct {
|
||||||
|
|
|
@ -46,7 +46,9 @@ func main() {
|
||||||
}
|
}
|
||||||
yt := fetcher.NewYoutube(ytClient)
|
yt := fetcher.NewYoutube(ytClient)
|
||||||
|
|
||||||
fetcher := fetcher.NewFetch(videoRepo, mflx, fetchInterval, yt, logger)
|
openAIClient := fetcher.NewOpenAI(getParam("OPENAI_API_KEY", ""))
|
||||||
|
|
||||||
|
fetcher := fetcher.NewFetch(videoRepo, mflx, fetchInterval, yt, openAIClient, logger)
|
||||||
go fetcher.Run()
|
go fetcher.Run()
|
||||||
logger.Info("service started")
|
logger.Info("service started")
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import (
|
||||||
"database/sql"
|
"database/sql"
|
||||||
"ewintr.nl/yogai/model"
|
"ewintr.nl/yogai/model"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"github.com/lib/pq"
|
||||||
_ "github.com/lib/pq"
|
_ "github.com/lib/pq"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -42,8 +43,8 @@ func NewPostgresVideoRepository(postgres *Postgres) *PostgresVideoRepository {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *PostgresVideoRepository) Save(v *model.Video) error {
|
func (p *PostgresVideoRepository) Save(v *model.Video) error {
|
||||||
query := `INSERT INTO video (id, status, youtube_id, feed_id, title, description)
|
query := `INSERT INTO video (id, status, youtube_id, feed_id, title, description, summary)
|
||||||
VALUES ($1, $2, $3, $4, $5, $6)
|
VALUES ($1, $2, $3, $4, $5, $6, $7)
|
||||||
ON CONFLICT (id)
|
ON CONFLICT (id)
|
||||||
DO UPDATE SET
|
DO UPDATE SET
|
||||||
id = EXCLUDED.id,
|
id = EXCLUDED.id,
|
||||||
|
@ -51,23 +52,59 @@ DO UPDATE SET
|
||||||
youtube_id = EXCLUDED.youtube_id,
|
youtube_id = EXCLUDED.youtube_id,
|
||||||
feed_id = EXCLUDED.feed_id,
|
feed_id = EXCLUDED.feed_id,
|
||||||
title = EXCLUDED.title,
|
title = EXCLUDED.title,
|
||||||
description = EXCLUDED.description;`
|
description = EXCLUDED.description,
|
||||||
_, err := p.db.Exec(query, v.ID, v.Status, v.YoutubeID, v.FeedID, v.Title, v.Description)
|
summary = EXCLUDED.summary;`
|
||||||
|
_, err := p.db.Exec(query, v.ID, v.Status, v.YoutubeID, v.FeedID, v.Title, v.Description, v.Summary)
|
||||||
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (p *PostgresVideoRepository) FindByStatus(statuses ...model.Status) ([]*model.Video, error) {
|
||||||
|
query := `SELECT id, status, youtube_id, feed_id, title, description, summary
|
||||||
|
FROM video
|
||||||
|
WHERE status = ANY($1)`
|
||||||
|
rows, err := p.db.Query(query, pq.Array(statuses))
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
videos := []*model.Video{}
|
||||||
|
for rows.Next() {
|
||||||
|
v := &model.Video{}
|
||||||
|
if err := rows.Scan(&v.ID, &v.Status, &v.YoutubeID, &v.FeedID, &v.Title, &v.Description, &v.Summary); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
videos = append(videos, v)
|
||||||
|
}
|
||||||
|
rows.Close()
|
||||||
|
|
||||||
|
return videos, nil
|
||||||
|
}
|
||||||
|
|
||||||
var pgMigration = []string{
|
var pgMigration = []string{
|
||||||
`CREATE TYPE video_status AS ENUM ('new', 'ready')`,
|
`CREATE TYPE video_status AS ENUM ('new', 'ready')`,
|
||||||
`CREATE TABLE video (
|
`CREATE TABLE video (
|
||||||
id uuid PRIMARY KEY,
|
id uuid PRIMARY KEY,
|
||||||
status video_status NOT NULL,
|
status video_status NOT NULL,
|
||||||
youtube_id VARCHAR(255) NOT NULL UNIQUE,
|
youtube_id VARCHAR(255) NOT NULL UNIQUE,
|
||||||
title VARCHAR(255) NOT NULL,
|
title VARCHAR(255) NOT NULL,
|
||||||
feed_id VARCHAR(255) NOT NULL,
|
feed_id VARCHAR(255) NOT NULL,
|
||||||
description TEXT,
|
description TEXT,
|
||||||
summary TEXT
|
summary TEXT
|
||||||
)`,
|
)`,
|
||||||
|
`CREATE TYPE video_status_new AS ENUM ('new', 'has_metadata', 'has_summary', 'ready')`,
|
||||||
|
`ALTER TABLE video
|
||||||
|
ALTER COLUMN status TYPE video_status_new
|
||||||
|
USING video::text::video_status_new`,
|
||||||
|
`DROP TYPE video_status`,
|
||||||
|
`ALTER TYPE video_status_new RENAME TO video_status`,
|
||||||
|
`UPDATE video SET summary = '' WHERE summary IS NULL `,
|
||||||
|
`UPDATE video SET description = '' WHERE description IS NULL `,
|
||||||
|
`ALTER TABLE video
|
||||||
|
ALTER COLUMN summary SET DEFAULT '',
|
||||||
|
ALTER COLUMN summary SET NOT NULL,
|
||||||
|
ALTER COLUMN description SET DEFAULT '',
|
||||||
|
ALTER COLUMN description SET NOT NULL`,
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *Postgres) migrate(wanted []string) error {
|
func (p *Postgres) migrate(wanted []string) error {
|
||||||
|
|
|
@ -6,4 +6,5 @@ import (
|
||||||
|
|
||||||
type VideoRepository interface {
|
type VideoRepository interface {
|
||||||
Save(video *model.Video) error
|
Save(video *model.Video) error
|
||||||
|
FindByStatus(statuses ...model.Status) ([]*model.Video, error)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue