feat: Add redirect tracking and reporting for link checker

This commit is contained in:
Erik Winter (aider) 2024-11-25 10:04:07 +01:00
parent d49e358c58
commit 3f7cde44a2
2 changed files with 45 additions and 6 deletions

View File

@ -23,10 +23,16 @@ func NewLinkChecker() *LinkChecker {
} }
} }
type RedirectInfo struct {
FromURL string
ToURL string
}
type BrokenLink struct { type BrokenLink struct {
URL string URL string
StatusCode int StatusCode int
Error string Error string
Redirect *RedirectInfo
} }
func (lc *LinkChecker) normalizeURL(rawURL string) string { func (lc *LinkChecker) normalizeURL(rawURL string) string {
@ -81,7 +87,8 @@ func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenL
} }
for _, link := range links { for _, link := range links {
if status, err := lc.isLinkValid(link); status >= 400 || err != nil { status, redirect, err := lc.isLinkValid(link)
if status >= 400 || err != nil {
broken := BrokenLink{URL: link} broken := BrokenLink{URL: link}
if err != nil { if err != nil {
broken.Error = err.Error() broken.Error = err.Error()
@ -89,6 +96,13 @@ func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenL
broken.StatusCode = status broken.StatusCode = status
} }
brokenLinks = append(brokenLinks, broken) brokenLinks = append(brokenLinks, broken)
} else if redirect != nil {
broken := BrokenLink{
URL: link,
StatusCode: status,
Redirect: redirect,
}
brokenLinks = append(brokenLinks, broken)
} }
// Recursively check links from the same domain // Recursively check links from the same domain
@ -154,12 +168,35 @@ func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) {
return links, nil return links, nil
} }
func (lc *LinkChecker) isLinkValid(link string) (int, error) { func (lc *LinkChecker) isLinkValid(link string) (int, *RedirectInfo, error) {
resp, err := lc.client.Get(link) client := &http.Client{
CheckRedirect: func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
},
}
resp, err := client.Get(link)
if err != nil { if err != nil {
return 0, err return 0, nil, err
} }
defer resp.Body.Close() defer resp.Body.Close()
return resp.StatusCode, nil if resp.StatusCode >= 300 && resp.StatusCode < 400 {
location := resp.Header.Get("Location")
if location != "" {
redirectURL := location
if !strings.HasPrefix(location, "http") {
baseURL, _ := url.Parse(link)
if relative, err := baseURL.Parse(location); err == nil {
redirectURL = relative.String()
}
}
return resp.StatusCode, &RedirectInfo{
FromURL: link,
ToURL: redirectURL,
}, nil
}
}
return resp.StatusCode, nil, nil
} }

View File

@ -27,10 +27,12 @@ func main() {
return return
} }
fmt.Printf("Found %d broken links:\n", len(brokenLinks)) fmt.Printf("Found %d issues:\n", len(brokenLinks))
for _, link := range brokenLinks { for _, link := range brokenLinks {
if link.Error != "" { if link.Error != "" {
fmt.Printf("- %s (Error: %s)\n", link.URL, link.Error) fmt.Printf("- %s (Error: %s)\n", link.URL, link.Error)
} else if link.Redirect != nil {
fmt.Printf("- %s (Redirect %d -> %s)\n", link.URL, link.StatusCode, link.Redirect.ToURL)
} else { } else {
fmt.Printf("- %s (Status: %d)\n", link.URL, link.StatusCode) fmt.Printf("- %s (Status: %d)\n", link.URL, link.StatusCode)
} }