scrub review, reset all reviews

This commit is contained in:
Erik Winter 2023-12-30 09:19:53 +01:00
parent 946b05a9a1
commit e2b4233be2
11 changed files with 249 additions and 125 deletions

2
.gitignore vendored
View File

@ -1,4 +1,6 @@
*.db *.db
*.db-shm
*.db-wal
emdb emdb
emdb-api emdb-api
public public

View File

@ -3,6 +3,8 @@ package client
import ( import (
"fmt" "fmt"
"net/http" "net/http"
"regexp"
"strings"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
) )
@ -57,8 +59,25 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
return return
} }
reviews[permaLink] = reviewNode.Text() reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
}) })
return reviews, nil return reviews, nil
} }
func ScrubIMDBReview(review string) string {
// remove footer
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
review = strings.ReplaceAll(review, text, "")
}
// remove superfluous whitespace
reWS := regexp.MustCompile(`\n\s+`)
review = reWS.ReplaceAllString(review, "\n")
// remove superfluous newlines
re := regexp.MustCompile(`\n{3,}`)
review = re.ReplaceAllString(review, "\n\n")
return review
}

View File

@ -0,0 +1,53 @@
package handler
import (
"encoding/json"
"log/slog"
"net/http"
"ewintr.nl/emdb/cmd/api-service/job"
)
type AdminAPI struct {
jq *job.JobQueue
logger *slog.Logger
}
func NewAdminAPI(jq *job.JobQueue, logger *slog.Logger) *AdminAPI {
return &AdminAPI{
jq: jq,
logger: logger.With("api", "admin"),
}
}
func (adminAPI *AdminAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
logger := adminAPI.logger.With("method", "serveHTTP")
subPath, _ := ShiftPath(r.URL.Path)
switch {
case r.Method == http.MethodPost && subPath == "":
adminAPI.Add(w, r)
default:
Error(w, http.StatusNotFound, "unregistered path", nil, logger)
}
}
func (adminAPI *AdminAPI) Add(w http.ResponseWriter, r *http.Request) {
logger := adminAPI.logger.With("method", "add")
var job job.Job
if err := json.NewDecoder(r.Body).Decode(&job); err != nil {
Error(w, http.StatusBadRequest, "could not decode job", err, logger)
return
}
if err := adminAPI.jq.Add(job.MovieID, job.Action); err != nil {
Error(w, http.StatusInternalServerError, "could not add job", err, logger)
return
}
if err := json.NewEncoder(w).Encode(job); err != nil {
Error(w, http.StatusInternalServerError, "could not encode job", err, logger)
return
}
}

View File

@ -10,6 +10,7 @@ import (
"log/slog" "log/slog"
"net/http" "net/http"
"ewintr.nl/emdb/cmd/api-service/job"
"ewintr.nl/emdb/cmd/api-service/moviestore" "ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/google/uuid" "github.com/google/uuid"
) )
@ -17,11 +18,11 @@ import (
type MovieAPI struct { type MovieAPI struct {
apis APIIndex apis APIIndex
repo *moviestore.MovieRepository repo *moviestore.MovieRepository
jq *moviestore.JobQueue jq *job.JobQueue
logger *slog.Logger logger *slog.Logger
} }
func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore.JobQueue, logger *slog.Logger) *MovieAPI { func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *job.JobQueue, logger *slog.Logger) *MovieAPI {
return &MovieAPI{ return &MovieAPI{
apis: apis, apis: apis,
repo: repo, repo: repo,
@ -33,29 +34,30 @@ func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore
func (movieAPI *MovieAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { func (movieAPI *MovieAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
logger := movieAPI.logger.With("method", "serveHTTP") logger := movieAPI.logger.With("method", "serveHTTP")
subPath, subTail := ShiftPath(r.URL.Path) head, tail := ShiftPath(r.URL.Path)
subHead, subTail := ShiftPath(tail)
for aPath, api := range movieAPI.apis { for aPath, api := range movieAPI.apis {
if subPath == aPath { if head != "" && subHead == fmt.Sprintf("%s", aPath) {
r.URL.Path = subTail r.URL.Path = subTail
r = r.Clone(context.WithValue(r.Context(), MovieKey, subPath)) r = r.Clone(context.WithValue(r.Context(), MovieKey, head))
api.ServeHTTP(w, r) api.ServeHTTP(w, r)
return return
} }
} }
switch { switch {
case r.Method == http.MethodGet && subPath != "": case r.Method == http.MethodGet && head != "":
movieAPI.Read(w, r, subPath) movieAPI.Read(w, r, head)
case r.Method == http.MethodPut && subPath != "": case r.Method == http.MethodPut && head != "":
movieAPI.Store(w, r, subPath) movieAPI.Store(w, r, head)
case r.Method == http.MethodPost && subPath == "": case r.Method == http.MethodPost && head == "":
movieAPI.Store(w, r, "") movieAPI.Store(w, r, "")
case r.Method == http.MethodDelete && subPath != "": case r.Method == http.MethodDelete && head != "":
movieAPI.Delete(w, r, subPath) movieAPI.Delete(w, r, head)
case r.Method == http.MethodGet && subPath == "": case r.Method == http.MethodGet && head == "":
movieAPI.List(w, r) movieAPI.List(w, r)
default: default:
Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, subPath), logger) Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, head), logger)
} }
} }
@ -113,7 +115,7 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
return return
} }
if err := movieAPI.jq.Add(m.ID, moviestore.ActionFetchIMDBReviews); err != nil { if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger) Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
return return
} }

View File

@ -1,6 +1,7 @@
package handler package handler
import ( import (
"encoding/json"
"fmt" "fmt"
"log/slog" "log/slog"
"net/http" "net/http"
@ -25,17 +26,25 @@ func (reviewAPI *ReviewAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
subPath, _ := ShiftPath(r.URL.Path) subPath, _ := ShiftPath(r.URL.Path)
switch { switch {
//case r.Method == http.MethodGet && subPath != "": case r.Method == http.MethodGet && subPath == "":
// reviewAPI.Read(w, r, subPath) reviewAPI.List(w, r)
//case r.Method == http.MethodPut && subPath != "":
// reviewAPI.Store(w, r, subPath)
//case r.Method == http.MethodPost && subPath == "":
// reviewAPI.Store(w, r, "")
//case r.Method == http.MethodDelete && subPath != "":
// reviewAPI.Delete(w, r, subPath)
//case r.Method == http.MethodGet && subPath == "":
// reviewAPI.List(w, r)
default: default:
Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /review", r.Method, subPath), logger) Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /review", r.Method, subPath), logger)
} }
} }
func (reviewAPI *ReviewAPI) List(w http.ResponseWriter, r *http.Request) {
logger := reviewAPI.logger.With("method", "list")
movieID := r.Context().Value(MovieKey).(string)
reviews, err := reviewAPI.repo.FindByMovieID(movieID)
if err != nil {
Error(w, http.StatusInternalServerError, "could not get reviews", err, logger)
return
}
if err := json.NewEncoder(w).Encode(reviews); err != nil {
Error(w, http.StatusInternalServerError, "could not encode reviews", err, logger)
return
}
}

View File

@ -1,71 +0,0 @@
package handler
import (
"log/slog"
"ewintr.nl/emdb/client"
movie2 "ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/google/uuid"
)
type Worker struct {
jq *movie2.JobQueue
movieRepo *movie2.MovieRepository
reviewRepo *movie2.ReviewRepository
imdb *client.IMDB
logger *slog.Logger
}
func NewWorker(jq *movie2.JobQueue, movieRepo *movie2.MovieRepository, reviewRepo *movie2.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker {
return &Worker{
jq: jq,
movieRepo: movieRepo,
reviewRepo: reviewRepo,
imdb: imdb,
logger: logger.With("service", "worker"),
}
}
func (w *Worker) Run() {
w.logger.Info("starting worker")
for job := range w.jq.Next() {
w.logger.Info("got a new job", "jobID", job.ID, "movieID", job.MovieID, "action", job.Action)
switch job.Action {
case movie2.ActionFetchIMDBReviews:
w.fetchReviews(job)
default:
w.logger.Warn("unknown job action", "action", job.Action)
}
}
}
func (w *Worker) fetchReviews(job movie2.Job) {
logger := w.logger.With("method", "fetchReviews", "jobID", job.ID, "movieID", job.MovieID)
m, err := w.movieRepo.FindOne(job.MovieID)
if err != nil {
logger.Error("could not get movie", "error", err)
return
}
reviews, err := w.imdb.GetReviews(m.IMDBID)
if err != nil {
logger.Error("could not get reviews", "error", err)
return
}
for url, review := range reviews {
if err := w.reviewRepo.Store(movie2.Review{
ID: uuid.New().String(),
MovieID: m.ID,
Source: movie2.ReviewSourceIMDB,
URL: url,
Review: review,
}); err != nil {
logger.Error("could not store review", "error", err)
return
}
}
logger.Info("fetched reviews", "count", len(reviews))
}

View File

@ -0,0 +1,29 @@
package job
import (
"time"
)
type JobStatus string
const (
JobStatusToDo JobStatus = "todo"
JobStatusDoing JobStatus = "doing"
JobStatusDone JobStatus = "done"
)
type Action string
const (
interval = 10 * time.Second
ActionRefreshIMDBReviews Action = "refresh-imdb-reviews"
ActionRefreshAllIMDBReviews Action = "refresh-all-imdb-reviews"
)
type Job struct {
ID int
MovieID string
Action Action
Status JobStatus
}

View File

@ -1,42 +1,21 @@
package moviestore package job
import ( import (
"database/sql" "database/sql"
"errors" "errors"
"log/slog" "log/slog"
"time" "time"
"ewintr.nl/emdb/cmd/api-service/moviestore"
) )
type JobStatus string
const (
JobStatusToDo JobStatus = "todo"
JobStatusDoing JobStatus = "doing"
JobStatusDone JobStatus = "done"
)
type Action string
const (
interval = 10 * time.Second
ActionFetchIMDBReviews Action = "fetch-imdb-reviews"
)
type Job struct {
ID int
MovieID string
Action Action
Status JobStatus
}
type JobQueue struct { type JobQueue struct {
db *SQLite db *moviestore.SQLite
out chan Job out chan Job
logger *slog.Logger logger *slog.Logger
} }
func NewJobQueue(db *SQLite, logger *slog.Logger) *JobQueue { func NewJobQueue(db *moviestore.SQLite, logger *slog.Logger) *JobQueue {
return &JobQueue{ return &JobQueue{
db: db, db: db,
out: make(chan Job), out: make(chan Job),

View File

@ -0,0 +1,92 @@
package job
import (
"log/slog"
"ewintr.nl/emdb/client"
"ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/google/uuid"
)
type Worker struct {
jq *JobQueue
movieRepo *moviestore.MovieRepository
reviewRepo *moviestore.ReviewRepository
imdb *client.IMDB
logger *slog.Logger
}
func NewWorker(jq *JobQueue, movieRepo *moviestore.MovieRepository, reviewRepo *moviestore.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker {
return &Worker{
jq: jq,
movieRepo: movieRepo,
reviewRepo: reviewRepo,
imdb: imdb,
logger: logger.With("service", "worker"),
}
}
func (w *Worker) Run() {
w.logger.Info("starting worker")
for j := range w.jq.Next() {
w.logger.Info("got a new job", "jobID", j.ID, "movieID", j.MovieID, "action", j.Action)
switch j.Action {
case ActionRefreshIMDBReviews:
w.RefreshReviews(j.ID, j.MovieID)
case ActionRefreshAllIMDBReviews:
w.RefreshAllReviews(j.ID)
default:
w.logger.Warn("unknown job action", "action", j.Action)
}
}
}
func (w *Worker) RefreshAllReviews(jobID int) {
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
movies, err := w.movieRepo.FindAll()
if err != nil {
logger.Error("could not get movies", "error", err)
return
}
for _, m := range movies {
w.RefreshReviews(jobID, m.ID)
}
}
func (w *Worker) RefreshReviews(jobID int, movieID string) {
logger := w.logger.With("method", "fetchReviews", "jobID", jobID, "movieID", movieID)
m, err := w.movieRepo.FindOne(movieID)
if err != nil {
logger.Error("could not get movie", "error", err)
return
}
if err := w.reviewRepo.DeleteByMovieID(m.ID); err != nil {
logger.Error("could not delete reviews", "error", err)
return
}
reviews, err := w.imdb.GetReviews(m.IMDBID)
if err != nil {
logger.Error("could not get reviews", "error", err)
return
}
for url, review := range reviews {
if err := w.reviewRepo.Store(moviestore.Review{
ID: uuid.New().String(),
MovieID: m.ID,
Source: moviestore.ReviewSourceIMDB,
URL: url,
Review: review,
}); err != nil {
logger.Error("could not store review", "error", err)
return
}
}
logger.Info("refresh reviews", "count", len(reviews))
}

View File

@ -65,3 +65,11 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
return reviews, nil return reviews, nil
} }
func (rr *ReviewRepository) DeleteByMovieID(id string) error {
if _, err := rr.db.Exec(`DELETE FROM review WHERE movie_id=?`, id); err != nil {
return err
}
return nil
}

View File

@ -11,6 +11,7 @@ import (
"ewintr.nl/emdb/client" "ewintr.nl/emdb/client"
"ewintr.nl/emdb/cmd/api-service/handler" "ewintr.nl/emdb/cmd/api-service/handler"
"ewintr.nl/emdb/cmd/api-service/job"
"ewintr.nl/emdb/cmd/api-service/moviestore" "ewintr.nl/emdb/cmd/api-service/moviestore"
) )
@ -31,12 +32,13 @@ func main() {
os.Exit(1) os.Exit(1)
} }
jobQueue := moviestore.NewJobQueue(db, logger) jobQueue := job.NewJobQueue(db, logger)
go jobQueue.Run() go jobQueue.Run()
worker := handler.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger) worker := job.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger)
go worker.Run() go worker.Run()
apis := handler.APIIndex{ apis := handler.APIIndex{
"admin": handler.NewAdminAPI(jobQueue, logger),
"movie": handler.NewMovieAPI(handler.APIIndex{ "movie": handler.NewMovieAPI(handler.APIIndex{
"review": handler.NewReviewAPI(moviestore.NewReviewRepository(db), logger), "review": handler.NewReviewAPI(moviestore.NewReviewRepository(db), logger),
}, moviestore.NewMovieRepository(db), jobQueue, logger), }, moviestore.NewMovieRepository(db), jobQueue, logger),