98 lines
2.1 KiB
Go
98 lines
2.1 KiB
Go
package client
|
|
|
|
import (
|
|
"fmt"
|
|
"net/http"
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"code.ewintr.nl/emdb/storage"
|
|
"github.com/PuerkitoBio/goquery"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
type IMDB struct {
|
|
}
|
|
|
|
func NewIMDB() *IMDB {
|
|
return &IMDB{}
|
|
}
|
|
|
|
func (i *IMDB) GetReviews(m storage.Movie) ([]storage.Review, error) {
|
|
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", m.IMDBID)
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
res, err := http.DefaultClient.Do(req)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if res.StatusCode != http.StatusOK {
|
|
return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode)
|
|
}
|
|
|
|
doc, err := goquery.NewDocumentFromReader(res.Body)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer res.Body.Close()
|
|
|
|
reviews := make([]storage.Review, 0)
|
|
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
|
|
|
|
var permaLink string
|
|
reviewNode.Find("a").Each(func(i int, s *goquery.Selection) {
|
|
if s.Text() == "Permalink" {
|
|
link, exists := s.Attr("href")
|
|
if exists {
|
|
permaLink = link
|
|
}
|
|
}
|
|
})
|
|
|
|
if permaLink == "" {
|
|
return
|
|
}
|
|
|
|
rat, rev := ScrubIMDBReview(reviewNode.Text())
|
|
reviews = append(reviews, storage.Review{
|
|
ID: uuid.New().String(),
|
|
MovieID: m.ID,
|
|
Source: storage.ReviewSourceIMDB,
|
|
URL: fmt.Sprintf("https://www.imdb.com%s", permaLink),
|
|
Review: rev,
|
|
MovieRating: rat,
|
|
})
|
|
})
|
|
|
|
return reviews, nil
|
|
}
|
|
|
|
func ScrubIMDBReview(review string) (int, string) {
|
|
// remove footer
|
|
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
|
review = strings.ReplaceAll(review, text, "")
|
|
}
|
|
|
|
// remove superfluous whitespace
|
|
reWS := regexp.MustCompile(`\n\s+`)
|
|
review = reWS.ReplaceAllString(review, "\n")
|
|
|
|
// remove superfluous newlines
|
|
reRev := regexp.MustCompile(`\n{3,}`)
|
|
review = reRev.ReplaceAllString(review, "\n\n")
|
|
|
|
reRat := regexp.MustCompile(`(\d+)/10\n`)
|
|
reMatch := reRat.FindStringSubmatch(review)
|
|
var rating int
|
|
if len(reMatch) > 0 {
|
|
rating, _ = strconv.Atoi(reMatch[1])
|
|
review = strings.ReplaceAll(review, reMatch[0], "")
|
|
}
|
|
|
|
return rating, review
|
|
}
|