get rating from imdb review

This commit is contained in:
Erik Winter 2024-01-17 07:57:52 +01:00
parent bcdee2bdb1
commit 14b34f6d0d
6 changed files with 55 additions and 53 deletions

View File

@ -4,16 +4,14 @@ import (
"fmt" "fmt"
"net/http" "net/http"
"regexp" "regexp"
"strconv"
"strings" "strings"
"ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/PuerkitoBio/goquery" "github.com/PuerkitoBio/goquery"
"github.com/google/uuid"
) )
type Review struct {
Source string
Review string
}
type IMDB struct { type IMDB struct {
} }
@ -21,8 +19,8 @@ func NewIMDB() *IMDB {
return &IMDB{} return &IMDB{}
} }
func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) { func (i *IMDB) GetReviews(m moviestore.Movie) ([]moviestore.Review, error) {
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID) url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID)
req, err := http.NewRequest(http.MethodGet, url, nil) req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil { if err != nil {
return nil, err return nil, err
@ -42,7 +40,7 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
} }
defer res.Body.Close() defer res.Body.Close()
reviews := make(map[string]string) reviews := make([]moviestore.Review, 0)
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) { doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
var permaLink string var permaLink string
@ -59,13 +57,21 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
return return
} }
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text()) rat, rev := ScrubIMDBReview(reviewNode.Text())
reviews = append(reviews, moviestore.Review{
ID: uuid.New().String(),
MovieID: m.ID,
Source: moviestore.ReviewSourceIMDB,
URL: fmt.Sprintf("https://www.imdb.com%s", permaLink),
Review: rev,
MovieRating: rat,
})
}) })
return reviews, nil return reviews, nil
} }
func ScrubIMDBReview(review string) string { func ScrubIMDBReview(review string) (int, string) {
// remove footer // remove footer
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} { for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
review = strings.ReplaceAll(review, text, "") review = strings.ReplaceAll(review, text, "")
@ -76,8 +82,16 @@ func ScrubIMDBReview(review string) string {
review = reWS.ReplaceAllString(review, "\n") review = reWS.ReplaceAllString(review, "\n")
// remove superfluous newlines // remove superfluous newlines
re := regexp.MustCompile(`\n{3,}`) reRev := regexp.MustCompile(`\n{3,}`)
review = re.ReplaceAllString(review, "\n\n") review = reRev.ReplaceAllString(review, "\n\n")
return review reRat := regexp.MustCompile(`(\d+)/10\n`)
reMatch := reRat.FindStringSubmatch(review)
var rating int
if len(reMatch) > 0 {
rating, _ = strconv.Atoi(reMatch[1])
review = strings.ReplaceAll(review, reMatch[0], "")
}
return rating, review
} }

View File

@ -115,11 +115,6 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
return return
} }
if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
return
}
resBody, err := json.Marshal(m) resBody, err := json.Marshal(m)
if err != nil { if err != nil {
Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger) Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger)

View File

@ -6,12 +6,6 @@ import (
type JobStatus string type JobStatus string
const (
JobStatusToDo JobStatus = "todo"
JobStatusDoing JobStatus = "doing"
JobStatusDone JobStatus = "done"
)
type Action string type Action string
const ( const (

View File

@ -5,7 +5,6 @@ import (
"ewintr.nl/emdb/client" "ewintr.nl/emdb/client"
"ewintr.nl/emdb/cmd/api-service/moviestore" "ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/google/uuid"
) )
type Worker struct { type Worker struct {
@ -41,6 +40,10 @@ func (w *Worker) Run() {
} }
} }
func (w *Worker) FindNewJobs() {
}
func (w *Worker) RefreshAllReviews(jobID int) { func (w *Worker) RefreshAllReviews(jobID int) {
logger := w.logger.With("method", "fetchReviews", "jobID", jobID) logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
@ -72,20 +75,14 @@ func (w *Worker) RefreshReviews(jobID int, movieID string) {
return return
} }
reviews, err := w.imdb.GetReviews(m.IMDBID) reviews, err := w.imdb.GetReviews(m)
if err != nil { if err != nil {
logger.Error("could not get reviews", "error", err) logger.Error("could not get reviews", "error", err)
return return
} }
for url, review := range reviews { for _, review := range reviews {
if err := w.reviewRepo.Store(moviestore.Review{ if err := w.reviewRepo.Store(review); err != nil {
ID: uuid.New().String(),
MovieID: m.ID,
Source: moviestore.ReviewSourceIMDB,
URL: url,
Review: review,
}); err != nil {
logger.Error("could not store review", "error", err) logger.Error("could not store review", "error", err)
return return
} }

View File

@ -16,6 +16,7 @@ type Review struct {
Source ReviewSource Source ReviewSource
URL string URL string
Review string Review string
MovieRating int
Quality int Quality int
Mentions []string Mentions []string
} }
@ -31,8 +32,8 @@ func NewReviewRepository(db *SQLite) *ReviewRepository {
} }
func (rr *ReviewRepository) Store(r Review) error { func (rr *ReviewRepository) Store(r Review) error {
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?)`, if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, movie_rating, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil { r.ID, r.MovieID, r.Source, r.URL, r.Review, r.MovieRating, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
return err return err
} }
@ -40,14 +41,14 @@ func (rr *ReviewRepository) Store(r Review) error {
} }
func (rr *ReviewRepository) FindOne(id string) (Review, error) { func (rr *ReviewRepository) FindOne(id string) (Review, error) {
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE id=?`, id) row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE id=?`, id)
if row.Err() != nil { if row.Err() != nil {
return Review{}, row.Err() return Review{}, row.Err()
} }
r := Review{} r := Review{}
var mentions string var mentions string
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return Review{}, err return Review{}, err
} }
r.Mentions = make([]string, 0) r.Mentions = make([]string, 0)
@ -58,7 +59,7 @@ func (rr *ReviewRepository) FindOne(id string) (Review, error) {
} }
func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) { func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE movie_id=?`, movieID) rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE movie_id=?`, movieID)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -67,7 +68,7 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
var mentions string var mentions string
for rows.Next() { for rows.Next() {
r := Review{} r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err return nil, err
} }
r.Mentions = make([]string, 0) r.Mentions = make([]string, 0)
@ -82,14 +83,14 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
} }
func (rr *ReviewRepository) FindNextUnrated() (Review, error) { func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0 LIMIT 1`) row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
if row.Err() != nil { if row.Err() != nil {
return Review{}, row.Err() return Review{}, row.Err()
} }
r := Review{} r := Review{}
var mentions string var mentions string
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return Review{}, err return Review{}, err
} }
r.Mentions = make([]string, 0) r.Mentions = make([]string, 0)
@ -101,7 +102,7 @@ func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
} }
func (rr *ReviewRepository) FindUnrated() ([]Review, error) { func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0`) rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0`)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -110,7 +111,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
var mentions string var mentions string
for rows.Next() { for rows.Next() {
r := Review{} r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err return nil, err
} }
r.Mentions = make([]string, 0) r.Mentions = make([]string, 0)
@ -125,7 +126,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
} }
func (rr *ReviewRepository) FindAll() ([]Review, error) { func (rr *ReviewRepository) FindAll() ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review`) rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review`)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@ -134,7 +135,7 @@ func (rr *ReviewRepository) FindAll() ([]Review, error) {
var mentions string var mentions string
for rows.Next() { for rows.Next() {
r := Review{} r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil { if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err return nil, err
} }
r.Mentions = make([]string, 0) r.Mentions = make([]string, 0)

View File

@ -67,6 +67,7 @@ var sqliteMigrations = []sqliteMigration{
`ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`, `ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE review DROP COLUMN "references"`, `ALTER TABLE review DROP COLUMN "references"`,
`ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`, `ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`,
`ALTER TABLE review ADD COLUMN "movie_rating" INTEGER NOT NULL DEFAULT 0`,
} }
var ( var (