2023-12-29 19:10:31 +01:00
|
|
|
package client
|
|
|
|
|
|
|
|
import (
|
|
|
|
"fmt"
|
|
|
|
"net/http"
|
2023-12-30 09:19:53 +01:00
|
|
|
"regexp"
|
|
|
|
"strings"
|
2023-12-29 19:10:31 +01:00
|
|
|
|
|
|
|
"github.com/PuerkitoBio/goquery"
|
|
|
|
)
|
|
|
|
|
|
|
|
type Review struct {
|
|
|
|
Source string
|
|
|
|
Review string
|
|
|
|
}
|
|
|
|
|
|
|
|
type IMDB struct {
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewIMDB() *IMDB {
|
|
|
|
return &IMDB{}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (i *IMDB) GetReviews(imdbID string) (map[string]string, error) {
|
|
|
|
url := fmt.Sprintf("https://www.imdb.com/title/%s/reviews", imdbID)
|
|
|
|
req, err := http.NewRequest(http.MethodGet, url, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
res, err := http.DefaultClient.Do(req)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
if res.StatusCode != http.StatusOK {
|
|
|
|
return nil, fmt.Errorf("unexpected status code: %d", res.StatusCode)
|
|
|
|
}
|
|
|
|
|
|
|
|
doc, err := goquery.NewDocumentFromReader(res.Body)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
defer res.Body.Close()
|
|
|
|
|
|
|
|
reviews := make(map[string]string)
|
|
|
|
doc.Find(".lister-item-content").Each(func(i int, reviewNode *goquery.Selection) {
|
|
|
|
|
|
|
|
var permaLink string
|
|
|
|
reviewNode.Find("a").Each(func(i int, s *goquery.Selection) {
|
|
|
|
if s.Text() == "Permalink" {
|
|
|
|
link, exists := s.Attr("href")
|
|
|
|
if exists {
|
|
|
|
permaLink = link
|
|
|
|
}
|
|
|
|
}
|
|
|
|
})
|
|
|
|
|
|
|
|
if permaLink == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2023-12-30 09:19:53 +01:00
|
|
|
reviews[permaLink] = ScrubIMDBReview(reviewNode.Text())
|
2023-12-29 19:10:31 +01:00
|
|
|
})
|
|
|
|
|
|
|
|
return reviews, nil
|
|
|
|
}
|
2023-12-30 09:19:53 +01:00
|
|
|
|
|
|
|
func ScrubIMDBReview(review string) string {
|
|
|
|
// remove footer
|
|
|
|
for _, text := range []string{"Was this review helpful?", "Sign in to vote.", "Permalink"} {
|
|
|
|
review = strings.ReplaceAll(review, text, "")
|
|
|
|
}
|
|
|
|
|
|
|
|
// remove superfluous whitespace
|
|
|
|
reWS := regexp.MustCompile(`\n\s+`)
|
|
|
|
review = reWS.ReplaceAllString(review, "\n")
|
|
|
|
|
|
|
|
// remove superfluous newlines
|
|
|
|
re := regexp.MustCompile(`\n{3,}`)
|
|
|
|
review = re.ReplaceAllString(review, "\n\n")
|
|
|
|
|
|
|
|
return review
|
|
|
|
}
|