get rating from imdb review
This commit is contained in:
parent
bcdee2bdb1
commit
14b34f6d0d
|
@ -4,16 +4,14 @@ import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
|
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||||
"github.com/PuerkitoBio/goquery"
|
"github.com/PuerkitoBio/goquery"
|
||||||
|
"github.com/google/uuid"
|
||||||
)
|
)
|
||||||
|
|
||||||
type Review struct {
|
|
||||||
Source string
|
|
||||||
Review string
|
|
||||||
}
|
|
||||||
|
|
||||||
type IMDB struct {
|
type IMDB struct {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,8 +19,8 @@ func NewIMDB() *IMDB {
|
||||||
return &IMDB{}
|
return &IMDB{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
func (i *IMDB) GetReviews(m moviestore.Movie) ([]moviestore.Review, error) {
|
||||||
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID)
|
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID)
|
||||||
req, err := http.NewRequest(http.MethodGet, url, nil)
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
|
@ -42,7 +40,7 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
||||||
}
|
}
|
||||||
defer res.Body.Close()
|
defer res.Body.Close()
|
||||||
|
|
||||||
reviews := make(map[string]string)
|
reviews := make([]moviestore.Review, 0)
|
||||||
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
|
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
|
||||||
|
|
||||||
var permaLink string
|
var permaLink string
|
||||||
|
@ -59,13 +57,21 @@ func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
|
rat, rev := ScrubIMDBReview(reviewNode.Text())
|
||||||
|
reviews = append(reviews, moviestore.Review{
|
||||||
|
ID: uuid.New().String(),
|
||||||
|
MovieID: m.ID,
|
||||||
|
Source: moviestore.ReviewSourceIMDB,
|
||||||
|
URL: fmt.Sprintf("https://www.imdb.com%s", permaLink),
|
||||||
|
Review: rev,
|
||||||
|
MovieRating: rat,
|
||||||
|
})
|
||||||
})
|
})
|
||||||
|
|
||||||
return reviews, nil
|
return reviews, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func ScrubIMDBReview(review string) string {
|
func ScrubIMDBReview(review string) (int, string) {
|
||||||
// remove footer
|
// remove footer
|
||||||
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
||||||
review = strings.ReplaceAll(review, text, "")
|
review = strings.ReplaceAll(review, text, "")
|
||||||
|
@ -76,8 +82,16 @@ func ScrubIMDBReview(review string) string {
|
||||||
review = reWS.ReplaceAllString(review, "\n")
|
review = reWS.ReplaceAllString(review, "\n")
|
||||||
|
|
||||||
// remove superfluous newlines
|
// remove superfluous newlines
|
||||||
re := regexp.MustCompile(`\n{3,}`)
|
reRev := regexp.MustCompile(`\n{3,}`)
|
||||||
review = re.ReplaceAllString(review, "\n\n")
|
review = reRev.ReplaceAllString(review, "\n\n")
|
||||||
|
|
||||||
return review
|
reRat := regexp.MustCompile(`(\d+)/10\n`)
|
||||||
|
reMatch := reRat.FindStringSubmatch(review)
|
||||||
|
var rating int
|
||||||
|
if len(reMatch) > 0 {
|
||||||
|
rating, _ = strconv.Atoi(reMatch[1])
|
||||||
|
review = strings.ReplaceAll(review, reMatch[0], "")
|
||||||
|
}
|
||||||
|
|
||||||
|
return rating, review
|
||||||
}
|
}
|
||||||
|
|
|
@ -115,11 +115,6 @@ func (movieAPI *MovieAPI) Store(w http.ResponseWriter, r *http.Request, urlID st
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := movieAPI.jq.Add(m.ID, job.ActionRefreshIMDBReviews); err != nil {
|
|
||||||
Error(w, http.StatusInternalServerError, "could not add job to queue", err, logger)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
resBody, err := json.Marshal(m)
|
resBody, err := json.Marshal(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger)
|
Error(w, http.StatusInternalServerError, "could not marshal movie", err, logger)
|
||||||
|
|
|
@ -6,12 +6,6 @@ import (
|
||||||
|
|
||||||
type JobStatus string
|
type JobStatus string
|
||||||
|
|
||||||
const (
|
|
||||||
JobStatusToDo JobStatus = "todo"
|
|
||||||
JobStatusDoing JobStatus = "doing"
|
|
||||||
JobStatusDone JobStatus = "done"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Action string
|
type Action string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
|
@ -5,7 +5,6 @@ import (
|
||||||
|
|
||||||
"ewintr.nl/emdb/client"
|
"ewintr.nl/emdb/client"
|
||||||
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
"ewintr.nl/emdb/cmd/api-service/moviestore"
|
||||||
"github.com/google/uuid"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Worker struct {
|
type Worker struct {
|
||||||
|
@ -41,6 +40,10 @@ func (w *Worker) Run() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (w *Worker) FindNewJobs() {
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
func (w *Worker) RefreshAllReviews(jobID int) {
|
func (w *Worker) RefreshAllReviews(jobID int) {
|
||||||
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
|
logger := w.logger.With("method", "fetchReviews", "jobID", jobID)
|
||||||
|
|
||||||
|
@ -72,20 +75,14 @@ func (w *Worker) RefreshReviews(jobID int, movieID string) {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
reviews, err := w.imdb.GetReviews(m.IMDBID)
|
reviews, err := w.imdb.GetReviews(m)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("could not get reviews", "error", err)
|
logger.Error("could not get reviews", "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
for url, review := range reviews {
|
for _, review := range reviews {
|
||||||
if err := w.reviewRepo.Store(moviestore.Review{
|
if err := w.reviewRepo.Store(review); err != nil {
|
||||||
ID: uuid.New().String(),
|
|
||||||
MovieID: m.ID,
|
|
||||||
Source: moviestore.ReviewSourceIMDB,
|
|
||||||
URL: url,
|
|
||||||
Review: review,
|
|
||||||
}); err != nil {
|
|
||||||
logger.Error("could not store review", "error", err)
|
logger.Error("could not store review", "error", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
|
@ -11,13 +11,14 @@ const (
|
||||||
type ReviewSource string
|
type ReviewSource string
|
||||||
|
|
||||||
type Review struct {
|
type Review struct {
|
||||||
ID string
|
ID string
|
||||||
MovieID string
|
MovieID string
|
||||||
Source ReviewSource
|
Source ReviewSource
|
||||||
URL string
|
URL string
|
||||||
Review string
|
Review string
|
||||||
Quality int
|
MovieRating int
|
||||||
Mentions []string
|
Quality int
|
||||||
|
Mentions []string
|
||||||
}
|
}
|
||||||
|
|
||||||
type ReviewRepository struct {
|
type ReviewRepository struct {
|
||||||
|
@ -31,8 +32,8 @@ func NewReviewRepository(db *SQLite) *ReviewRepository {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) Store(r Review) error {
|
func (rr *ReviewRepository) Store(r Review) error {
|
||||||
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?)`,
|
if _, err := rr.db.Exec(`REPLACE INTO review (id, movie_id, source, url, review, movie_rating, quality, mentions) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`,
|
||||||
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
|
r.ID, r.MovieID, r.Source, r.URL, r.Review, r.MovieRating, r.Quality, strings.Join(r.Mentions, MentionsSeparator)); err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -40,14 +41,14 @@ func (rr *ReviewRepository) Store(r Review) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) FindOne(id string) (Review, error) {
|
func (rr *ReviewRepository) FindOne(id string) (Review, error) {
|
||||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE id=?`, id)
|
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE id=?`, id)
|
||||||
if row.Err() != nil {
|
if row.Err() != nil {
|
||||||
return Review{}, row.Err()
|
return Review{}, row.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
r := Review{}
|
r := Review{}
|
||||||
var mentions string
|
var mentions string
|
||||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||||
return Review{}, err
|
return Review{}, err
|
||||||
}
|
}
|
||||||
r.Mentions = make([]string, 0)
|
r.Mentions = make([]string, 0)
|
||||||
|
@ -58,7 +59,7 @@ func (rr *ReviewRepository) FindOne(id string) (Review, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
||||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE movie_id=?`, movieID)
|
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE movie_id=?`, movieID)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -67,7 +68,7 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
||||||
var mentions string
|
var mentions string
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
r := Review{}
|
r := Review{}
|
||||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
r.Mentions = make([]string, 0)
|
r.Mentions = make([]string, 0)
|
||||||
|
@ -82,14 +83,14 @@ func (rr *ReviewRepository) FindByMovieID(movieID string) ([]Review, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
|
func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
|
||||||
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
|
row := rr.db.QueryRow(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0 LIMIT 1`)
|
||||||
if row.Err() != nil {
|
if row.Err() != nil {
|
||||||
return Review{}, row.Err()
|
return Review{}, row.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
r := Review{}
|
r := Review{}
|
||||||
var mentions string
|
var mentions string
|
||||||
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
if err := row.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||||
return Review{}, err
|
return Review{}, err
|
||||||
}
|
}
|
||||||
r.Mentions = make([]string, 0)
|
r.Mentions = make([]string, 0)
|
||||||
|
@ -101,7 +102,7 @@ func (rr *ReviewRepository) FindNextUnrated() (Review, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
||||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review WHERE quality=0`)
|
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review WHERE quality=0`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -110,7 +111,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
||||||
var mentions string
|
var mentions string
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
r := Review{}
|
r := Review{}
|
||||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
r.Mentions = make([]string, 0)
|
r.Mentions = make([]string, 0)
|
||||||
|
@ -125,7 +126,7 @@ func (rr *ReviewRepository) FindUnrated() ([]Review, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func (rr *ReviewRepository) FindAll() ([]Review, error) {
|
func (rr *ReviewRepository) FindAll() ([]Review, error) {
|
||||||
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, quality, mentions FROM review`)
|
rows, err := rr.db.Query(`SELECT id, movie_id, source, url, review, movie_rating, quality, mentions FROM review`)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
@ -134,7 +135,7 @@ func (rr *ReviewRepository) FindAll() ([]Review, error) {
|
||||||
var mentions string
|
var mentions string
|
||||||
for rows.Next() {
|
for rows.Next() {
|
||||||
r := Review{}
|
r := Review{}
|
||||||
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.Quality, &mentions); err != nil {
|
if err := rows.Scan(&r.ID, &r.MovieID, &r.Source, &r.URL, &r.Review, &r.MovieRating, &r.Quality, &mentions); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
r.Mentions = make([]string, 0)
|
r.Mentions = make([]string, 0)
|
||||||
|
|
|
@ -67,6 +67,7 @@ var sqliteMigrations = []sqliteMigration{
|
||||||
`ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`,
|
`ALTER TABLE review ADD COLUMN "quality" INTEGER NOT NULL DEFAULT 0`,
|
||||||
`ALTER TABLE review DROP COLUMN "references"`,
|
`ALTER TABLE review DROP COLUMN "references"`,
|
||||||
`ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`,
|
`ALTER TABLE review ADD COLUMN "mentions" TEXT NOT NULL DEFAULT ""`,
|
||||||
|
`ALTER TABLE review ADD COLUMN "movie_rating" INTEGER NOT NULL DEFAULT 0`,
|
||||||
}
|
}
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
|
Loading…
Reference in New Issue