get rating from imdb review
This commit is contained in:
parent
bcdee2bdb1
commit
14b34f6d0d
|
@ -4,16 +4,14 @@ import (
|
|||
"fmt"
|
||||
"net/http"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
"github.com/PuerkitoBio/goquery"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Review struct {
|
||||
Source string
|
||||
Review string
|
||||
}
|
||||
|
||||
type IMDB struct {
|
||||
}
|
||||
|
||||
|
@ -21,8 +19,8 @@ func NewIMDB() *IMDB {
|
|||
return &IMDB{}
|
||||
}
|
||||
|
||||
func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
||||
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID)
|
||||
func (i *IMDB) GetReviews(m moviestore.Movie) ([]moviestore.Review, error) {
|
||||
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID)
|
||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -42,7 +40,7 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
|||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
reviews := make(map[string]string)
|
||||
reviews := make([]moviestore.Review, 0)
|
||||
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
|
||||
|
||||
var permaLink string
|
||||
|
@ -59,13 +57,21 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
|||
return
|
||||
}
|
||||
|
||||
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
|
||||
rat, rev := ScrubIMDBReview(reviewNode.Text())
|
||||
reviews = append(reviews, moviestore.Review{
|
||||
ID: uuid.New().String(),
|
||||
MovieID: m.ID,
|
||||
Source: moviestore.ReviewSourceIMDB,
|
||||
URL: fmt.Sprintf("https://www.imdb.com%s", permaLink),
|
||||
Review: rev,
|
||||
MovieRating: rat,
|
||||
})
|
||||
})
|
||||
|
||||
return reviews, nil
|
||||
}
|
||||
|
||||
func ScrubIMDBReview(review string) string {
|
||||
func ScrubIMDBReview(review string) (int, string) {
|
||||
// remove footer
|
||||
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
||||
review = strings.ReplaceAll(review, text, "")
|
||||
|
@ -76,8 +82,16 @@ func ScrubIMDBReview(review string) string {
|
|||
review = reWS.ReplaceAllString(review, "\n")
|
||||
|
||||
// remove superfluous newlines
|
||||
re := regexp.MustCompile(`\n{3,}`)
|
||||
review = re.ReplaceAllString(review, "\n\n")
|
||||
reRev := regexp.MustCompile(`\n{3,}`)
|
||||
review = reRev.ReplaceAllString(review, "\n\n")
|
||||
|
||||
return review
|
||||
reRat := regexp.MustCompile(`(\d+)/10\n`)
|
||||
reMatch := reRat.FindStringSubmatch(review)
|
||||
var rating int
|
||||
if len(reMatch) > 0 {
|
||||
rating, _ = strconv.Atoi(reMatch[1])
|
||||
review = strings.ReplaceAll(review, reMatch[0], "")
|
||||
}
|
||||
|
||||
return rating, review
|
||||
}
|
||||
|
|
|
@ -115,11 +115,6 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
|
|||
return
|
||||
}
|
||||
|
||||
if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
|
||||
return
|
||||
}
|
||||
|
||||
resBody, err := json.Marshal(m)
|
||||
if err != nil {
|
||||
Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger)
|
||||
|
|
|
@ -6,12 +6,6 @@ import (
|
|||
|
||||
type JobStatus string
|
||||
|
||||
const (
|
||||
JobStatusToDo JobStatus = "todo"
|
||||
JobStatusDoing JobStatus = "doing"
|
||||
JobStatusDone JobStatus = "done"
|
||||
)
|
||||
|
||||
type Action string
|
||||
|
||||
const (
|
||||
|
|
|
@ -5,7 +5,6 @@ import (
|
|||
|
||||
"ewintr.nl/emdb/client"
|
||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Worker struct {
|
||||
|
@ -41,6 +40,10 @@ func (w *Worker) Run() {
|
|||
}
|
||||
}
|
||||
|
||||
func (w *Worker) FindNewJobs() {
|
||||
|
||||
}
|
||||
|
||||
func (w *Worker) RefreshAllReviews(jobID int) {
|
||||
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
|
||||
|
||||
|
@ -72,20 +75,14 @@ func (w *Worker) RefreshReviews(jobID int, movieID string) {
|
|||
return
|
||||
}
|
||||
|
||||
reviews, err := w.imdb.GetReviews(m.IMDBID)
|
||||
reviews, err := w.imdb.GetReviews(m)
|
||||
if err != nil {
|
||||
logger.Error("could not get reviews", "error", err)
|
||||
return
|
||||
}
|
||||
|
||||
for url, review := range reviews {
|
||||
if err := w.reviewRepo.Store(moviestore.Review{
|
||||
ID: uuid.New().String(),
|
||||
MovieID: m.ID,
|
||||
Source: moviestore.ReviewSourceIMDB,
|
||||
URL: url,
|
||||
Review: review,
|
||||
}); err != nil {
|
||||
for _, review := range reviews {
|
||||
if err := w.reviewRepo.Store(review); err != nil {
|
||||
logger.Error("could not store review", "error", err)
|
||||
return
|
||||
}
|
||||
|
|
|
@ -11,13 +11,14 @@ const (
|
|||
type ReviewSource string
|
||||
|
||||
type Review struct {
|
||||
ID string
|
||||
MovieID string
|
||||
Source ReviewSource
|
||||
URL string
|
||||
Review string
|
||||
Quality int
|
||||
Mentions []string
|
||||
ID string
|
||||
MovieID string
|
||||
Source ReviewSource
|
||||
URL string
|
||||
Review string
|
||||
MovieRating int
|
||||
Quality int
|
||||
Mentions []string
|
||||
}
|
||||
|
||||
type ReviewRepository struct {
|
||||
|
@ -31,8 +32,8 @@ func NewReviewRepository(db *SQLite) *ReviewRepository {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) Store(r Review) error {
|
||||
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
||||
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
|
||||
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, movie_rating, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.MovieRating, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
|
@ -40,14 +41,14 @@ func (rr *ReviewRepository) Store(r Review) error {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) FindOne(id string) (Review, error) {
|
||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE id=?`, id)
|
||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE id=?`, id)
|
||||
if row.Err() != nil {
|
||||
return Review{}, row.Err()
|
||||
}
|
||||
|
||||
r := Review{}
|
||||
var mentions string
|
||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||
return Review{}, err
|
||||
}
|
||||
r.Mentions = make([]string, 0)
|
||||
|
@ -58,7 +59,7 @@ func (rr *ReviewRepository) FindOne(id string) (Review, error) {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE movie_id=?`, movieID)
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE movie_id=?`, movieID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -67,7 +68,7 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
|||
var mentions string
|
||||
for rows.Next() {
|
||||
r := Review{}
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.Mentions = make([]string, 0)
|
||||
|
@ -82,14 +83,14 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
|
||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
|
||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
|
||||
if row.Err() != nil {
|
||||
return Review{}, row.Err()
|
||||
}
|
||||
|
||||
r := Review{}
|
||||
var mentions string
|
||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||
return Review{}, err
|
||||
}
|
||||
r.Mentions = make([]string, 0)
|
||||
|
@ -101,7 +102,7 @@ func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0`)
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -110,7 +111,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
|||
var mentions string
|
||||
for rows.Next() {
|
||||
r := Review{}
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.Mentions = make([]string, 0)
|
||||
|
@ -125,7 +126,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
|||
}
|
||||
|
||||
func (rr *ReviewRepository) FindAll() ([]Review, error) {
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review`)
|
||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review`)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -134,7 +135,7 @@ func (rr *ReviewRepository) FindAll() ([]Review, error) {
|
|||
var mentions string
|
||||
for rows.Next() {
|
||||
r := Review{}
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
r.Mentions = make([]string, 0)
|
||||
|
|
|
@ -67,6 +67,7 @@ var sqliteMigrations = []sqliteMigration{
|
|||
`ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`,
|
||||
`ALTER TABLE review DROP COLUMN "references"`,
|
||||
`ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`,
|
||||
`ALTER TABLE review ADD COLUMN "movie_rating" INTEGER NOT NULL DEFAULT 0`,
|
||||
}
|
||||
|
||||
var (
|
||||
|
|
Loading…
Reference in New Issue