diff --git a/.gitignore b/.gitignore index 382bd48..07766d4 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ *.db +*.db-shm +*.db-wal emdb emdb-api public \ No newline at end of file diff --git a/client/imdb.go b/client/imdb.go index afd7884..43a23cc 100644 --- a/client/imdb.go +++ b/client/imdb.go @@ -3,6 +3,8 @@ package client import ( "fmt" "net/http" + "regexp" + "strings" "github.com/PuerkitoBio/goquery" ) @@ -57,8 +59,25 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) { return } - reviews[permaLink] = reviewNode.Text() + reviews[permaLink] = ScrubIMDBReview(reviewNode.Text()) }) return reviews, nil } + +func ScrubIMDBReview(review string) string { + // remove footer + for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} { + review = strings.ReplaceAll(review, text, "") + } + + // remove superfluous whitespace + reWS := regexp.MustCompile(`\n\s+`) + review = reWS.ReplaceAllString(review, "\n") + + // remove superfluous newlines + re := regexp.MustCompile(`\n{3,}`) + review = re.ReplaceAllString(review, "\n\n") + + return review +} diff --git a/cmd/api-service/handler/admin.go b/cmd/api-service/handler/admin.go new file mode 100644 index 0000000..3b5e923 --- /dev/null +++ b/cmd/api-service/handler/admin.go @@ -0,0 +1,53 @@ +package handler + +import ( + "encoding/json" + "log/slog" + "net/http" + + "ewintr.nl/emdb/cmd/api-service/job" +) + +type AdminAPI struct { + jq *job.JobQueue + logger *slog.Logger +} + +func NewAdminAPI(jq *job.JobQueue, logger *slog.Logger) *AdminAPI { + return &AdminAPI{ + jq: jq, + logger: logger.With("api", "admin"), + } +} + +func (adminAPI *AdminAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { + logger := adminAPI.logger.With("method", "serveHTTP") + + subPath, _ := ShiftPath(r.URL.Path) + switch { + case r.Method == http.MethodPost && subPath == "": + adminAPI.Add(w, r) + default: + Error(w, http.StatusNotFound, "unregistered path", nil, logger) + } +} + +func (adminAPI *AdminAPI) Add(w http.ResponseWriter, r *http.Request) { + logger := adminAPI.logger.With("method", "add") + + var job job.Job + if err := json.NewDecoder(r.Body).Decode(&job); err != nil { + Error(w, http.StatusBadRequest, "could not decode job", err, logger) + return + } + + if err := adminAPI.jq.Add(job.MovieID, job.Action); err != nil { + Error(w, http.StatusInternalServerError, "could not add job", err, logger) + return + } + + if err := json.NewEncoder(w).Encode(job); err != nil { + Error(w, http.StatusInternalServerError, "could not encode job", err, logger) + return + } +} diff --git a/cmd/api-service/handler/movie.go b/cmd/api-service/handler/movie.go index 9e47c7a..b822920 100644 --- a/cmd/api-service/handler/movie.go +++ b/cmd/api-service/handler/movie.go @@ -10,6 +10,7 @@ import ( "log/slog" "net/http" + "ewintr.nl/emdb/cmd/api-service/job" "ewintr.nl/emdb/cmd/api-service/moviestore" "github.com/google/uuid" ) @@ -17,11 +18,11 @@ import ( type MovieAPI struct { apis APIIndex repo *moviestore.MovieRepository - jq *moviestore.JobQueue + jq *job.JobQueue logger *slog.Logger } -func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore.JobQueue, logger *slog.Logger) *MovieAPI { +func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *job.JobQueue, logger *slog.Logger) *MovieAPI { return &MovieAPI{ apis: apis, repo: repo, @@ -33,29 +34,30 @@ func NewMovieAPI(apis APIIndex, repo *moviestore.MovieRepository, jq *moviestore func (movieAPI *MovieAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { logger := movieAPI.logger.With("method", "serveHTTP") - subPath, subTail := ShiftPath(r.URL.Path) + head, tail := ShiftPath(r.URL.Path) + subHead, subTail := ShiftPath(tail) for aPath, api := range movieAPI.apis { - if subPath == aPath { + if head != "" && subHead == fmt.Sprintf("%s", aPath) { r.URL.Path = subTail - r = r.Clone(context.WithValue(r.Context(), MovieKey, subPath)) + r = r.Clone(context.WithValue(r.Context(), MovieKey, head)) api.ServeHTTP(w, r) return } } switch { - case r.Method == http.MethodGet && subPath != "": - movieAPI.Read(w, r, subPath) - case r.Method == http.MethodPut && subPath != "": - movieAPI.Store(w, r, subPath) - case r.Method == http.MethodPost && subPath == "": + case r.Method == http.MethodGet && head != "": + movieAPI.Read(w, r, head) + case r.Method == http.MethodPut && head != "": + movieAPI.Store(w, r, head) + case r.Method == http.MethodPost && head == "": movieAPI.Store(w, r, "") - case r.Method == http.MethodDelete && subPath != "": - movieAPI.Delete(w, r, subPath) - case r.Method == http.MethodGet && subPath == "": + case r.Method == http.MethodDelete && head != "": + movieAPI.Delete(w, r, head) + case r.Method == http.MethodGet && head == "": movieAPI.List(w, r) default: - Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, subPath), logger) + Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /movie", r.Method, head), logger) } } @@ -113,7 +115,7 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st return } - if err := movieAPI.jq.Add(m.ID, moviestore.ActionFetchIMDBReviews); err != nil { + if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil { Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger) return } diff --git a/cmd/api-service/handler/review.go b/cmd/api-service/handler/review.go index 9a40d82..f095d4c 100644 --- a/cmd/api-service/handler/review.go +++ b/cmd/api-service/handler/review.go @@ -1,6 +1,7 @@ package handler import ( + "encoding/json" "fmt" "log/slog" "net/http" @@ -25,17 +26,25 @@ func (reviewAPI *ReviewAPI) ServeHTTP(w http.ResponseWriter, r *http.Request) { subPath, _ := ShiftPath(r.URL.Path) switch { - //case r.Method == http.MethodGet && subPath != "": - // reviewAPI.Read(w, r, subPath) - //case r.Method == http.MethodPut && subPath != "": - // reviewAPI.Store(w, r, subPath) - //case r.Method == http.MethodPost && subPath == "": - // reviewAPI.Store(w, r, "") - //case r.Method == http.MethodDelete && subPath != "": - // reviewAPI.Delete(w, r, subPath) - //case r.Method == http.MethodGet && subPath == "": - // reviewAPI.List(w, r) + case r.Method == http.MethodGet && subPath == "": + reviewAPI.List(w, r) default: Error(w, http.StatusNotFound, "unregistered path", fmt.Errorf("method %q with subpath %q was not registered in /review", r.Method, subPath), logger) } } + +func (reviewAPI *ReviewAPI) List(w http.ResponseWriter, r *http.Request) { + logger := reviewAPI.logger.With("method", "list") + + movieID := r.Context().Value(MovieKey).(string) + reviews, err := reviewAPI.repo.FindByMovieID(movieID) + if err != nil { + Error(w, http.StatusInternalServerError, "could not get reviews", err, logger) + return + } + + if err := json.NewEncoder(w).Encode(reviews); err != nil { + Error(w, http.StatusInternalServerError, "could not encode reviews", err, logger) + return + } +} diff --git a/cmd/api-service/handler/worker.go b/cmd/api-service/handler/worker.go deleted file mode 100644 index 3b52ef1..0000000 --- a/cmd/api-service/handler/worker.go +++ /dev/null @@ -1,71 +0,0 @@ -package handler - -import ( - "log/slog" - - "ewintr.nl/emdb/client" - movie2 "ewintr.nl/emdb/cmd/api-service/moviestore" - "github.com/google/uuid" -) - -type Worker struct { - jq *movie2.JobQueue - movieRepo *movie2.MovieRepository - reviewRepo *movie2.ReviewRepository - imdb *client.IMDB - logger *slog.Logger -} - -func NewWorker(jq *movie2.JobQueue, movieRepo *movie2.MovieRepository, reviewRepo *movie2.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker { - return &Worker{ - jq: jq, - movieRepo: movieRepo, - reviewRepo: reviewRepo, - imdb: imdb, - logger: logger.With("service", "worker"), - } -} - -func (w *Worker) Run() { - w.logger.Info("starting worker") - for job := range w.jq.Next() { - w.logger.Info("got a new job", "jobID", job.ID, "movieID", job.MovieID, "action", job.Action) - switch job.Action { - case movie2.ActionFetchIMDBReviews: - w.fetchReviews(job) - default: - w.logger.Warn("unknown job action", "action", job.Action) - } - } -} - -func (w *Worker) fetchReviews(job movie2.Job) { - logger := w.logger.With("method", "fetchReviews", "jobID", job.ID, "movieID", job.MovieID) - - m, err := w.movieRepo.FindOne(job.MovieID) - if err != nil { - logger.Error("could not get movie", "error", err) - return - } - - reviews, err := w.imdb.GetReviews(m.IMDBID) - if err != nil { - logger.Error("could not get reviews", "error", err) - return - } - - for url, review := range reviews { - if err := w.reviewRepo.Store(movie2.Review{ - ID: uuid.New().String(), - MovieID: m.ID, - Source: movie2.ReviewSourceIMDB, - URL: url, - Review: review, - }); err != nil { - logger.Error("could not store review", "error", err) - return - } - } - - logger.Info("fetched reviews", "count", len(reviews)) -} diff --git a/cmd/api-service/job/job.go b/cmd/api-service/job/job.go new file mode 100644 index 0000000..1fbf28e --- /dev/null +++ b/cmd/api-service/job/job.go @@ -0,0 +1,29 @@ +package job + +import ( + "time" +) + +type JobStatus string + +const ( + JobStatusToDo JobStatus = "todo" + JobStatusDoing JobStatus = "doing" + JobStatusDone JobStatus = "done" +) + +type Action string + +const ( + interval = 10 * time.Second + + ActionRefreshIMDBReviews Action = "refresh-imdb-reviews" + ActionRefreshAllIMDBReviews Action = "refresh-all-imdb-reviews" +) + +type Job struct { + ID int + MovieID string + Action Action + Status JobStatus +} diff --git a/cmd/api-service/moviestore/job.go b/cmd/api-service/job/queue.go similarity index 77% rename from cmd/api-service/moviestore/job.go rename to cmd/api-service/job/queue.go index acc7dd2..39524b1 100644 --- a/cmd/api-service/moviestore/job.go +++ b/cmd/api-service/job/queue.go @@ -1,42 +1,21 @@ -package moviestore +package job import ( "database/sql" "errors" "log/slog" "time" + + "ewintr.nl/emdb/cmd/api-service/moviestore" ) -type JobStatus string - -const ( - JobStatusToDo JobStatus = "todo" - JobStatusDoing JobStatus = "doing" - JobStatusDone JobStatus = "done" -) - -type Action string - -const ( - interval = 10 * time.Second - - ActionFetchIMDBReviews Action = "fetch-imdb-reviews" -) - -type Job struct { - ID int - MovieID string - Action Action - Status JobStatus -} - type JobQueue struct { - db *SQLite + db *moviestore.SQLite out chan Job logger *slog.Logger } -func NewJobQueue(db *SQLite, logger *slog.Logger) *JobQueue { +func NewJobQueue(db *moviestore.SQLite, logger *slog.Logger) *JobQueue { return &JobQueue{ db: db, out: make(chan Job), diff --git a/cmd/api-service/job/worker.go b/cmd/api-service/job/worker.go new file mode 100644 index 0000000..8328469 --- /dev/null +++ b/cmd/api-service/job/worker.go @@ -0,0 +1,92 @@ +package job + +import ( + "log/slog" + + "ewintr.nl/emdb/client" + "ewintr.nl/emdb/cmd/api-service/moviestore" + "github.com/google/uuid" +) + +type Worker struct { + jq *JobQueue + movieRepo *moviestore.MovieRepository + reviewRepo *moviestore.ReviewRepository + imdb *client.IMDB + logger *slog.Logger +} + +func NewWorker(jq *JobQueue, movieRepo *moviestore.MovieRepository, reviewRepo *moviestore.ReviewRepository, imdb *client.IMDB, logger *slog.Logger) *Worker { + return &Worker{ + jq: jq, + movieRepo: movieRepo, + reviewRepo: reviewRepo, + imdb: imdb, + logger: logger.With("service", "worker"), + } +} + +func (w *Worker) Run() { + w.logger.Info("starting worker") + for j := range w.jq.Next() { + w.logger.Info("got a new job", "jobID", j.ID, "movieID", j.MovieID, "action", j.Action) + switch j.Action { + case ActionRefreshIMDBReviews: + w.RefreshReviews(j.ID, j.MovieID) + case ActionRefreshAllIMDBReviews: + w.RefreshAllReviews(j.ID) + default: + w.logger.Warn("unknown job action", "action", j.Action) + } + } +} + +func (w *Worker) RefreshAllReviews(jobID int) { + logger := w.logger.With("method", "fetchReviews", "jobID", jobID) + + movies, err := w.movieRepo.FindAll() + if err != nil { + logger.Error("could not get movies", "error", err) + return + } + + for _, m := range movies { + w.RefreshReviews(jobID, m.ID) + } +} + +func (w *Worker) RefreshReviews(jobID int, movieID string) { + logger := w.logger.With("method", "fetchReviews", "jobID", jobID, "movieID", movieID) + + m, err := w.movieRepo.FindOne(movieID) + if err != nil { + logger.Error("could not get movie", "error", err) + return + } + + if err := w.reviewRepo.DeleteByMovieID(m.ID); err != nil { + logger.Error("could not delete reviews", "error", err) + return + } + + reviews, err := w.imdb.GetReviews(m.IMDBID) + if err != nil { + logger.Error("could not get reviews", "error", err) + return + } + + for url, review := range reviews { + if err := w.reviewRepo.Store(moviestore.Review{ + ID: uuid.New().String(), + MovieID: m.ID, + Source: moviestore.ReviewSourceIMDB, + URL: url, + Review: review, + }); err != nil { + logger.Error("could not store review", "error", err) + return + } + } + + logger.Info("refresh reviews", "count", len(reviews)) +} diff --git a/cmd/api-service/moviestore/review.go b/cmd/api-service/moviestore/review.go index 3df8177..5e7ea70 100644 --- a/cmd/api-service/moviestore/review.go +++ b/cmd/api-service/moviestore/review.go @@ -65,3 +65,11 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) { return reviews, nil } + +func (rr *ReviewRepository) DeleteByMovieID(id string) error { + if _, err := rr.db.Exec(`DELETE FROM review WHERE movie_id=?`, id); err != nil { + return err + } + + return nil +} diff --git a/cmd/api-service/service.go b/cmd/api-service/service.go index f91c740..c724b86 100644 --- a/cmd/api-service/service.go +++ b/cmd/api-service/service.go @@ -11,6 +11,7 @@ import ( "ewintr.nl/emdb/client" "ewintr.nl/emdb/cmd/api-service/handler" + "ewintr.nl/emdb/cmd/api-service/job" "ewintr.nl/emdb/cmd/api-service/moviestore" ) @@ -31,12 +32,13 @@ func main() { os.Exit(1) } - jobQueue := moviestore.NewJobQueue(db, logger) + jobQueue := job.NewJobQueue(db, logger) go jobQueue.Run() - worker := handler.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger) + worker := job.NewWorker(jobQueue, moviestore.NewMovieRepository(db), moviestore.NewReviewRepository(db), client.NewIMDB(), logger) go worker.Run() apis := handler.APIIndex{ + "admin": handler.NewAdminAPI(jobQueue, logger), "movie": handler.NewMovieAPI(handler.APIIndex{ "review": handler.NewReviewAPI(moviestore.NewReviewRepository(db), logger), }, moviestore.NewMovieRepository(db), jobQueue, logger),