get rating from imdb review

This commit is contained in:
Erik Winter 2024-01-17 07:57:52 +01:00
parent bcdee2bdb1
commit 14b34f6d0d
6 changed files with 55 additions and 53 deletions

View File

@ -4,16 +4,14 @@ import (
"fmt"
"net/http"
"regexp"
"strconv"
"strings"
"ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/PuerkitoBio/goquery"
"github.com/google/uuid"
)
type Review struct {
Source string
Review string
}
type IMDB struct {
}
@ -21,8 +19,8 @@ func NewIMDB() *IMDB {
return &IMDB{}
}
func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID)
func (i *IMDB) GetReviews(m moviestore.Movie) ([]moviestore.Review, error) {
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID)
req, err := http.NewRequest(http.MethodGet, url, nil)
if err != nil {
return nil, err
@ -42,7 +40,7 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
}
defer res.Body.Close()
reviews := make(map[string]string)
reviews := make([]moviestore.Review, 0)
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
var permaLink string
@ -59,13 +57,21 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
return
}
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
rat, rev := ScrubIMDBReview(reviewNode.Text())
reviews = append(reviews, moviestore.Review{
ID: uuid.New().String(),
MovieID: m.ID,
Source: moviestore.ReviewSourceIMDB,
URL: fmt.Sprintf("https://www.imdb.com%s", permaLink),
Review: rev,
MovieRating: rat,
})
})
return reviews, nil
}
func ScrubIMDBReview(review string) string {
func ScrubIMDBReview(review string) (int, string) {
// remove footer
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
review = strings.ReplaceAll(review, text, "")
@ -76,8 +82,16 @@ func ScrubIMDBReview(review string) string {
review = reWS.ReplaceAllString(review, "\n")
// remove superfluous newlines
re := regexp.MustCompile(`\n{3,}`)
review = re.ReplaceAllString(review, "\n\n")
reRev := regexp.MustCompile(`\n{3,}`)
review = reRev.ReplaceAllString(review, "\n\n")
return review
reRat := regexp.MustCompile(`(\d+)/10\n`)
reMatch := reRat.FindStringSubmatch(review)
var rating int
if len(reMatch) > 0 {
rating, _ = strconv.Atoi(reMatch[1])
review = strings.ReplaceAll(review, reMatch[0], "")
}
return rating, review
}

View File

@ -115,11 +115,6 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
return
}
if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
return
}
resBody, err := json.Marshal(m)
if err != nil {
Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger)

View File

@ -6,12 +6,6 @@ import (
type JobStatus string
const (
JobStatusToDo JobStatus = "todo"
JobStatusDoing JobStatus = "doing"
JobStatusDone JobStatus = "done"
)
type Action string
const (

View File

@ -5,7 +5,6 @@ import (
"ewintr.nl/emdb/client"
"ewintr.nl/emdb/cmd/api-service/moviestore"
"github.com/google/uuid"
)
type Worker struct {
@ -41,6 +40,10 @@ func (w *Worker) Run() {
}
}
func (w *Worker) FindNewJobs() {
}
func (w *Worker) RefreshAllReviews(jobID int) {
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
@ -72,20 +75,14 @@ func (w *Worker) RefreshReviews(jobID int, movieID string) {
return
}
reviews, err := w.imdb.GetReviews(m.IMDBID)
reviews, err := w.imdb.GetReviews(m)
if err != nil {
logger.Error("could not get reviews", "error", err)
return
}
for url, review := range reviews {
if err := w.reviewRepo.Store(moviestore.Review{
ID: uuid.New().String(),
MovieID: m.ID,
Source: moviestore.ReviewSourceIMDB,
URL: url,
Review: review,
}); err != nil {
for _, review := range reviews {
if err := w.reviewRepo.Store(review); err != nil {
logger.Error("could not store review", "error", err)
return
}

View File

@ -16,6 +16,7 @@ type Review struct {
Source ReviewSource
URL string
Review string
MovieRating int
Quality int
Mentions []string
}
@ -31,8 +32,8 @@ func NewReviewRepository(db *SQLite) *ReviewRepository {
}
func (rr *ReviewRepository) Store(r Review) error {
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?)`,
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, movie_rating, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.MovieRating, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
return err
}
@ -40,14 +41,14 @@ func (rr *ReviewRepository) Store(r Review) error {
}
func (rr *ReviewRepository) FindOne(id string) (Review, error) {
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE id=?`, id)
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE id=?`, id)
if row.Err() != nil {
return Review{}, row.Err()
}
r := Review{}
var mentions string
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return Review{}, err
}
r.Mentions = make([]string, 0)
@ -58,7 +59,7 @@ func (rr *ReviewRepository) FindOne(id string) (Review, error) {
}
func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE movie_id=?`, movieID)
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE movie_id=?`, movieID)
if err != nil {
return nil, err
}
@ -67,7 +68,7 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
var mentions string
for rows.Next() {
r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err
}
r.Mentions = make([]string, 0)
@ -82,14 +83,14 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
}
func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
if row.Err() != nil {
return Review{}, row.Err()
}
r := Review{}
var mentions string
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return Review{}, err
}
r.Mentions = make([]string, 0)
@ -101,7 +102,7 @@ func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
}
func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0`)
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0`)
if err != nil {
return nil, err
}
@ -110,7 +111,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
var mentions string
for rows.Next() {
r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err
}
r.Mentions = make([]string, 0)
@ -125,7 +126,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
}
func (rr *ReviewRepository) FindAll() ([]Review, error) {
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review`)
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review`)
if err != nil {
return nil, err
}
@ -134,7 +135,7 @@ func (rr *ReviewRepository) FindAll() ([]Review, error) {
var mentions string
for rows.Next() {
r := Review{}
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
return nil, err
}
r.Mentions = make([]string, 0)

View File

@ -67,6 +67,7 @@ var sqliteMigrations = []sqliteMigration{
`ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`,
`ALTER TABLE review DROP COLUMN "references"`,
`ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`,
`ALTER TABLE review ADD COLUMN "movie_rating" INTEGER NOT NULL DEFAULT 0`,
}
var (