diff --git a/client/imdb.go b/client/imdb.go index 43a23cc..b9b90d9 100644 --- a/client/imdb.go +++ b/client/imdb.go @@ -4,16 +4,14 @@ import ( "fmt" "net/http" "regexp" + "strconv" "strings" + "ewintr.nl/emdb/cmd/api-service/moviestore" "github.com/PuerkitoBio/goquery" + "github.com/google/uuid" ) -type Review struct { - Source string - Review string -} - type IMDB struct { } @@ -21,8 +19,8 @@ func NewIMDB() *IMDB { return &IMDB{} } -func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) { - url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID) +func (i *IMDB) GetReviews(m moviestore.Movie) ([]moviestore.Review, error) { + url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID) req, err := http.NewRequest(http.MethodGet, url, nil) if err != nil { return nil, err @@ -42,7 +40,7 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) { } defer res.Body.Close() - reviews := make(map[string]string) + reviews := make([]moviestore.Review, 0) doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) { var permaLink string @@ -59,13 +57,21 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) { return } - reviews[permaLink] = ScrubIMDBReview(reviewNode.Text()) + rat, rev := ScrubIMDBReview(reviewNode.Text()) + reviews = append(reviews, moviestore.Review{ + ID: uuid.New().String(), + MovieID: m.ID, + Source: moviestore.ReviewSourceIMDB, + URL: fmt.Sprintf("https://www.imdb.com%s", permaLink), + Review: rev, + MovieRating: rat, + }) }) return reviews, nil } -func ScrubIMDBReview(review string) string { +func ScrubIMDBReview(review string) (int, string) { // remove footer for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} { review = strings.ReplaceAll(review, text, "") @@ -76,8 +82,16 @@ func ScrubIMDBReview(review string) string { review = reWS.ReplaceAllString(review, "\n") // remove superfluous newlines - re := regexp.MustCompile(`\n{3,}`) - review = re.ReplaceAllString(review, "\n\n") + reRev := regexp.MustCompile(`\n{3,}`) + review = reRev.ReplaceAllString(review, "\n\n") - return review + reRat := regexp.MustCompile(`(\d+)/10\n`) + reMatch := reRat.FindStringSubmatch(review) + var rating int + if len(reMatch) > 0 { + rating, _ = strconv.Atoi(reMatch[1]) + review = strings.ReplaceAll(review, reMatch[0], "") + } + + return rating, review } diff --git a/cmd/api-service/handler/movie.go b/cmd/api-service/handler/movie.go index b822920..04ae4dd 100644 --- a/cmd/api-service/handler/movie.go +++ b/cmd/api-service/handler/movie.go @@ -115,11 +115,6 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st return } - if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil { - Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger) - return - } - resBody, err := json.Marshal(m) if err != nil { Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger) diff --git a/cmd/api-service/job/job.go b/cmd/api-service/job/job.go index 1fbf28e..43ff511 100644 --- a/cmd/api-service/job/job.go +++ b/cmd/api-service/job/job.go @@ -6,12 +6,6 @@ import ( type JobStatus string -const ( - JobStatusToDo JobStatus = "todo" - JobStatusDoing JobStatus = "doing" - JobStatusDone JobStatus = "done" -) - type Action string const ( diff --git a/cmd/api-service/job/worker.go b/cmd/api-service/job/worker.go index 13273e5..97516d5 100644 --- a/cmd/api-service/job/worker.go +++ b/cmd/api-service/job/worker.go @@ -5,7 +5,6 @@ import ( "ewintr.nl/emdb/client" "ewintr.nl/emdb/cmd/api-service/moviestore" - "github.com/google/uuid" ) type Worker struct { @@ -41,6 +40,10 @@ func (w *Worker) Run() { } } +func (w *Worker) FindNewJobs() { + +} + func (w *Worker) RefreshAllReviews(jobID int) { logger := w.logger.With("method", "fetchReviews", "jobID", jobID) @@ -72,20 +75,14 @@ func (w *Worker) RefreshReviews(jobID int, movieID string) { return } - reviews, err := w.imdb.GetReviews(m.IMDBID) + reviews, err := w.imdb.GetReviews(m) if err != nil { logger.Error("could not get reviews", "error", err) return } - for url, review := range reviews { - if err := w.reviewRepo.Store(moviestore.Review{ - ID: uuid.New().String(), - MovieID: m.ID, - Source: moviestore.ReviewSourceIMDB, - URL: url, - Review: review, - }); err != nil { + for _, review := range reviews { + if err := w.reviewRepo.Store(review); err != nil { logger.Error("could not store review", "error", err) return } diff --git a/cmd/api-service/moviestore/review.go b/cmd/api-service/moviestore/review.go index e24d40f..c7282f3 100644 --- a/cmd/api-service/moviestore/review.go +++ b/cmd/api-service/moviestore/review.go @@ -11,13 +11,14 @@ const ( type ReviewSource string type Review struct { - ID string - MovieID string - Source ReviewSource - URL string - Review string - Quality int - Mentions []string + ID string + MovieID string + Source ReviewSource + URL string + Review string + MovieRating int + Quality int + Mentions []string } type ReviewRepository struct { @@ -31,8 +32,8 @@ func NewReviewRepository(db *SQLite) *ReviewRepository { } func (rr *ReviewRepository) Store(r Review) error { - if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?)`, - r.ID, r.MovieID, r.Source, r.URL, r.Review, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil { + if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, movie_rating, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`, + r.ID, r.MovieID, r.Source, r.URL, r.Review, r.MovieRating, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil { return err } @@ -40,14 +41,14 @@ func (rr *ReviewRepository) Store(r Review) error { } func (rr *ReviewRepository) FindOne(id string) (Review, error) { - row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE id=?`, id) + row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE id=?`, id) if row.Err() != nil { return Review{}, row.Err() } r := Review{} var mentions string - if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { + if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil { return Review{}, err } r.Mentions = make([]string, 0) @@ -58,7 +59,7 @@ func (rr *ReviewRepository) FindOne(id string) (Review, error) { } func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) { - rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE movie_id=?`, movieID) + rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE movie_id=?`, movieID) if err != nil { return nil, err } @@ -67,7 +68,7 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) { var mentions string for rows.Next() { r := Review{} - if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { + if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil { return nil, err } r.Mentions = make([]string, 0) @@ -82,14 +83,14 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) { } func (rr *ReviewRepository) FindNextUnrated() (Review, error) { - row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0 LIMIT 1`) + row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0 LIMIT 1`) if row.Err() != nil { return Review{}, row.Err() } r := Review{} var mentions string - if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { + if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil { return Review{}, err } r.Mentions = make([]string, 0) @@ -101,7 +102,7 @@ func (rr *ReviewRepository) FindNextUnrated() (Review, error) { } func (rr *ReviewRepository) FindUnrated() ([]Review, error) { - rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0`) + rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0`) if err != nil { return nil, err } @@ -110,7 +111,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) { var mentions string for rows.Next() { r := Review{} - if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { + if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil { return nil, err } r.Mentions = make([]string, 0) @@ -125,7 +126,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) { } func (rr *ReviewRepository) FindAll() ([]Review, error) { - rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review`) + rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review`) if err != nil { return nil, err } @@ -134,7 +135,7 @@ func (rr *ReviewRepository) FindAll() ([]Review, error) { var mentions string for rows.Next() { r := Review{} - if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { + if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil { return nil, err } r.Mentions = make([]string, 0) diff --git a/cmd/api-service/moviestore/sqlite.go b/cmd/api-service/moviestore/sqlite.go index 70e4963..b5f6ce8 100644 --- a/cmd/api-service/moviestore/sqlite.go +++ b/cmd/api-service/moviestore/sqlite.go @@ -67,6 +67,7 @@ var sqliteMigrations = []sqliteMigration{ `ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`, `ALTER TABLE review DROP COLUMN "references"`, `ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`, + `ALTER TABLE review ADD COLUMN "movie_rating" INTEGER NOT NULL DEFAULT 0`, } var (