scrub review, reset all reviews
This commit is contained in:
parent
946b05a9a1
commit
e2b4233be2
|
@ -1,4 +1,6 @@
|
|||
*.db
|
||||
*.db-shm
|
||||
*.db-wal
|
||||
emdb
|
||||
emdb-api
|
||||
public
|
|
@ -3,6 +3,8 @@ package client
|
|||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
)
|
||||
|
@ -57,8 +59,25 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
|||
return
|
||||
}
|
||||
|
||||
reviews[permaLink] = reviewNode.Text()
|
||||
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
|
||||
})
|
||||
|
||||
return reviews, nil
|
||||
}
|
||||
|
||||
func ScrubIMDBReview(review string) string {
|
||||
// remove footer
|
||||
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
||||
review = strings.ReplaceAll(review, text, "")
|
||||
}
|
||||
|
||||
// remove superfluous whitespace
|
||||
reWS := regexp.MustCompile(`\n\s+`)
|
||||
review = reWS.ReplaceAllString(review, "\n")
|
||||
|
||||
// remove superfluous newlines
|
||||
re := regexp.MustCompile(`\n{3,}`)
|
||||
review = re.ReplaceAllString(review, "\n\n")
|
||||
|
||||
return review
|
||||
}
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
|
||||
"ewintr.nl/emdb/cmd/api-service/job"
|
||||
)
|
||||
|
||||
type AdminAPI struct {
|
||||
jq *job.JobQueue
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewAdminAPI(jq *job.JobQueue, logger *slog.Logger) *AdminAPI {
|
||||
return &AdminAPI{
|
||||
jq: jq,
|
||||
logger: logger.With("api", "admin"),
|
||||
}
|
||||
}
|
||||
|
||||
func (adminAPI *AdminAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
logger := adminAPI.logger.With("method", "serveHTTP")
|
||||
|
||||
subPath, _ := ShiftPath(r.URL.Path)
|
||||
switch {
|
||||
case r.Method == http.MethodPost && subPath == "":
|
||||
adminAPI.Add(w, r)
|
||||
default:
|
||||
Error(w, http.StatusNotFound, "unregistered path", nil, logger)
|
||||
}
|
||||
}
|
||||
|
||||
func (adminAPI *AdminAPI) Add(w http.ResponseWriter, r *http.Request) {
|
||||
logger := adminAPI.logger.With("method", "add")
|
||||
|
||||
var job job.Job
|
||||
if err := json.NewDecoder(r.Body).Decode(&job); err != nil {
|
||||
Error(w, http.StatusBadRequest, "could not decode job", err, logger)
|
||||
return
|
||||
}
|
||||
|
||||
if err := adminAPI.jq.Add(job.MovieID, job.Action); err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not add job", err, logger)
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(job); err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not encode job", err, logger)
|
||||
return
|
||||
}
|
||||
}
|
|
@ -10,6 +10,7 @@ import (
|
|||
"log/slog"
|
||||
"net/http"
|
||||
|
||||
"ewintr.nl/emdb/cmd/api-service/job"
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
@ -17,11 +18,11 @@ import (
|
|||
type MovieAPI struct {
|
||||
apis APIIndex
|
||||
repo *moviestore.MovieRepository
|
||||
jq *moviestore.JobQueue
|
||||
jq *job.JobQueue
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore.JobQueue, logger *slog.Logger) *MovieAPI {
|
||||
func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *job.JobQueue, logger *slog.Logger) *MovieAPI {
|
||||
return &MovieAPI{
|
||||
apis: apis,
|
||||
repo: repo,
|
||||
|
@ -33,29 +34,30 @@ func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore
|
|||
func (movieAPI *MovieAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
||||
logger := movieAPI.logger.With("method", "serveHTTP")
|
||||
|
||||
subPath, subTail := ShiftPath(r.URL.Path)
|
||||
head, tail := ShiftPath(r.URL.Path)
|
||||
subHead, subTail := ShiftPath(tail)
|
||||
for aPath, api := range movieAPI.apis {
|
||||
if subPath == aPath {
|
||||
if head != "" && subHead == fmt.Sprintf("%s", aPath) {
|
||||
r.URL.Path = subTail
|
||||
r = r.Clone(context.WithValue(r.Context(), MovieKey, subPath))
|
||||
r = r.Clone(context.WithValue(r.Context(), MovieKey, head))
|
||||
api.ServeHTTP(w, r)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
switch {
|
||||
case r.Method == http.MethodGet && subPath != "":
|
||||
movieAPI.Read(w, r, subPath)
|
||||
case r.Method == http.MethodPut && subPath != "":
|
||||
movieAPI.Store(w, r, subPath)
|
||||
case r.Method == http.MethodPost && subPath == "":
|
||||
case r.Method == http.MethodGet && head != "":
|
||||
movieAPI.Read(w, r, head)
|
||||
case r.Method == http.MethodPut && head != "":
|
||||
movieAPI.Store(w, r, head)
|
||||
case r.Method == http.MethodPost && head == "":
|
||||
movieAPI.Store(w, r, "")
|
||||
case r.Method == http.MethodDelete && subPath != "":
|
||||
movieAPI.Delete(w, r, subPath)
|
||||
case r.Method == http.MethodGet && subPath == "":
|
||||
case r.Method == http.MethodDelete && head != "":
|
||||
movieAPI.Delete(w, r, head)
|
||||
case r.Method == http.MethodGet && head == "":
|
||||
movieAPI.List(w, r)
|
||||
default:
|
||||
Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, subPath), logger)
|
||||
Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, head), logger)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -113,7 +115,7 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
|
|||
return
|
||||
}
|
||||
|
||||
if err := movieAPI.jq.Add(m.ID, moviestore.ActionFetchIMDBReviews); err != nil {
|
||||
if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"net/http"
|
||||
|
@ -25,17 +26,25 @@ func (reviewAPI *ReviewAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) {
|
|||
|
||||
subPath, _ := ShiftPath(r.URL.Path)
|
||||
switch {
|
||||
//case r.Method == http.MethodGet && subPath != "":
|
||||
// reviewAPI.Read(w, r, subPath)
|
||||
//case r.Method == http.MethodPut && subPath != "":
|
||||
// reviewAPI.Store(w, r, subPath)
|
||||
//case r.Method == http.MethodPost && subPath == "":
|
||||
// reviewAPI.Store(w, r, "")
|
||||
//case r.Method == http.MethodDelete && subPath != "":
|
||||
// reviewAPI.Delete(w, r, subPath)
|
||||
//case r.Method == http.MethodGet && subPath == "":
|
||||
// reviewAPI.List(w, r)
|
||||
case r.Method == http.MethodGet && subPath == "":
|
||||
reviewAPI.List(w, r)
|
||||
default:
|
||||
Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /review", r.Method, subPath), logger)
|
||||
}
|
||||
}
|
||||
|
||||
func (reviewAPI *ReviewAPI) List(w http.ResponseWriter, r *http.Request) {
|
||||
logger := reviewAPI.logger.With("method", "list")
|
||||
|
||||
movieID := r.Context().Value(MovieKey).(string)
|
||||
reviews, err := reviewAPI.repo.FindByMovieID(movieID)
|
||||
if err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not get reviews", err, logger)
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.NewEncoder(w).Encode(reviews); err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not encode reviews", err, logger)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,71 +0,0 @@
|
|||
package handler
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
|
||||
"ewintr.nl/emdb/client"
|
||||
movie2 "ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
jq *movie2.JobQueue
|
||||
movieRepo *movie2.MovieRepository
|
||||
reviewRepo *movie2.ReviewRepository
|
||||
imdb *client.IMDB
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewWorker(jq *movie2.JobQueue, movieRepo *movie2.MovieRepository, reviewRepo *movie2.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker {
|
||||
return &Worker{
|
||||
jq: jq,
|
||||
movieRepo: movieRepo,
|
||||
reviewRepo: reviewRepo,
|
||||
imdb: imdb,
|
||||
logger: logger.With("service", "worker"),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) Run() {
|
||||
w.logger.Info("starting worker")
|
||||
for job := range w.jq.Next() {
|
||||
w.logger.Info("got a new job", "jobID", job.ID, "movieID", job.MovieID, "action", job.Action)
|
||||
switch job.Action {
|
||||
case movie2.ActionFetchIMDBReviews:
|
||||
w.fetchReviews(job)
|
||||
default:
|
||||
w.logger.Warn("unknown job action", "action", job.Action)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) fetchReviews(job movie2.Job) {
|
||||
logger := w.logger.With("method", "fetchReviews", "jobID", job.ID, "movieID", job.MovieID)
|
||||
|
||||
m, err := w.movieRepo.FindOne(job.MovieID)
|
||||
if err != nil {
|
||||
logger.Error("could not get movie", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
reviews, err := w.imdb.GetReviews(m.IMDBID)
|
||||
if err != nil {
|
||||
logger.Error("could not get reviews", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
for url, review := range reviews {
|
||||
if err := w.reviewRepo.Store(movie2.Review{
|
||||
ID: uuid.New().String(),
|
||||
MovieID: m.ID,
|
||||
Source: movie2.ReviewSourceIMDB,
|
||||
URL: url,
|
||||
Review: review,
|
||||
}); err != nil {
|
||||
logger.Error("could not store review", "error", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("fetched reviews", "count", len(reviews))
|
||||
}
|
|
@ -0,0 +1,29 @@
|
|||
package job
|
||||
|
||||
import (
|
||||
"time"
|
||||
)
|
||||
|
||||
type JobStatus string
|
||||
|
||||
const (
|
||||
JobStatusToDo JobStatus = "todo"
|
||||
JobStatusDoing JobStatus = "doing"
|
||||
JobStatusDone JobStatus = "done"
|
||||
)
|
||||
|
||||
type Action string
|
||||
|
||||
const (
|
||||
interval = 10 * time.Second
|
||||
|
||||
ActionRefreshIMDBReviews Action = "refresh-imdb-reviews"
|
||||
ActionRefreshAllIMDBReviews Action = "refresh-all-imdb-reviews"
|
||||
)
|
||||
|
||||
type Job struct {
|
||||
ID int
|
||||
MovieID string
|
||||
Action Action
|
||||
Status JobStatus
|
||||
}
|
|
@ -1,42 +1,21 @@
|
|||
package moviestore
|
||||
package job
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"errors"
|
||||
"log/slog"
|
||||
"time"
|
||||
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
)
|
||||
|
||||
type JobStatus string
|
||||
|
||||
const (
|
||||
JobStatusToDo JobStatus = "todo"
|
||||
JobStatusDoing JobStatus = "doing"
|
||||
JobStatusDone JobStatus = "done"
|
||||
)
|
||||
|
||||
type Action string
|
||||
|
||||
const (
|
||||
interval = 10 * time.Second
|
||||
|
||||
ActionFetchIMDBReviews Action = "fetch-imdb-reviews"
|
||||
)
|
||||
|
||||
type Job struct {
|
||||
ID int
|
||||
MovieID string
|
||||
Action Action
|
||||
Status JobStatus
|
||||
}
|
||||
|
||||
type JobQueue struct {
|
||||
db *SQLite
|
||||
db *moviestore.SQLite
|
||||
out chan Job
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewJobQueue(db *SQLite, logger *slog.Logger) *JobQueue {
|
||||
func NewJobQueue(db *moviestore.SQLite, logger *slog.Logger) *JobQueue {
|
||||
return &JobQueue{
|
||||
db: db,
|
||||
out: make(chan Job),
|
|
@ -0,0 +1,92 @@
|
|||
package job
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
|
||||
"ewintr.nl/emdb/client"
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
jq *JobQueue
|
||||
movieRepo *moviestore.MovieRepository
|
||||
reviewRepo *moviestore.ReviewRepository
|
||||
imdb *client.IMDB
|
||||
logger *slog.Logger
|
||||
}
|
||||
|
||||
func NewWorker(jq *JobQueue, movieRepo *moviestore.MovieRepository, reviewRepo *moviestore.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker {
|
||||
return &Worker{
|
||||
jq: jq,
|
||||
movieRepo: movieRepo,
|
||||
reviewRepo: reviewRepo,
|
||||
imdb: imdb,
|
||||
logger: logger.With("service", "worker"),
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) Run() {
|
||||
w.logger.Info("starting worker")
|
||||
for j := range w.jq.Next() {
|
||||
w.logger.Info("got a new job", "jobID", j.ID, "movieID", j.MovieID, "action", j.Action)
|
||||
switch j.Action {
|
||||
case ActionRefreshIMDBReviews:
|
||||
w.RefreshReviews(j.ID, j.MovieID)
|
||||
case ActionRefreshAllIMDBReviews:
|
||||
w.RefreshAllReviews(j.ID)
|
||||
default:
|
||||
w.logger.Warn("unknown job action", "action", j.Action)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) RefreshAllReviews(jobID int) {
|
||||
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
|
||||
|
||||
movies, err := w.movieRepo.FindAll()
|
||||
if err != nil {
|
||||
logger.Error("could not get movies", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
for _, m := range movies {
|
||||
w.RefreshReviews(jobID, m.ID)
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Worker) RefreshReviews(jobID int, movieID string) {
|
||||
logger := w.logger.With("method", "fetchReviews", "jobID", jobID, "movieID", movieID)
|
||||
|
||||
m, err := w.movieRepo.FindOne(movieID)
|
||||
if err != nil {
|
||||
logger.Error("could not get movie", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
if err := w.reviewRepo.DeleteByMovieID(m.ID); err != nil {
|
||||
logger.Error("could not delete reviews", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
reviews, err := w.imdb.GetReviews(m.IMDBID)
|
||||
if err != nil {
|
||||
logger.Error("could not get reviews", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
for url, review := range reviews {
|
||||
if err := w.reviewRepo.Store(moviestore.Review{
|
||||
ID: uuid.New().String(),
|
||||
MovieID: m.ID,
|
||||
Source: moviestore.ReviewSourceIMDB,
|
||||
URL: url,
|
||||
Review: review,
|
||||
}); err != nil {
|
||||
logger.Error("could not store review", "error", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
logger.Info("refresh reviews", "count", len(reviews))
|
||||
}
|
|
@ -65,3 +65,11 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
|||
|
||||
return reviews, nil
|
||||
}
|
||||
|
||||
func (rr *ReviewRepository) DeleteByMovieID(id string) error {
|
||||
if _, err := rr.db.Exec(`DELETE FROM review WHERE movie_id=?`, id); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
|
|
@ -11,6 +11,7 @@ import (
|
|||
|
||||
"ewintr.nl/emdb/client"
|
||||
"ewintr.nl/emdb/cmd/api-service/handler"
|
||||
"ewintr.nl/emdb/cmd/api-service/job"
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
)
|
||||
|
||||
|
@ -31,12 +32,13 @@ func main() {
|
|||
os.Exit(1)
|
||||
}
|
||||
|
||||
jobQueue := moviestore.NewJobQueue(db, logger)
|
||||
jobQueue := job.NewJobQueue(db, logger)
|
||||
go jobQueue.Run()
|
||||
worker := handler.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger)
|
||||
worker := job.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger)
|
||||
go worker.Run()
|
||||
|
||||
apis := handler.APIIndex{
|
||||
"admin": handler.NewAdminAPI(jobQueue, logger),
|
||||
"movie": handler.NewMovieAPI(handler.APIIndex{
|
||||
"review": handler.NewReviewAPI(moviestore.NewReviewRepository(db), logger),
|
||||
}, moviestore.NewMovieRepository(db), jobQueue, logger),
|
||||
|
|
Loading…
Reference in New Issue