feat: Add redirect tracking and reporting for link checker
This commit is contained in:
parent
d49e358c58
commit
3f7cde44a2
|
@ -23,10 +23,16 @@ func NewLinkChecker() *LinkChecker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type RedirectInfo struct {
|
||||||
|
FromURL string
|
||||||
|
ToURL string
|
||||||
|
}
|
||||||
|
|
||||||
type BrokenLink struct {
|
type BrokenLink struct {
|
||||||
URL string
|
URL string
|
||||||
StatusCode int
|
StatusCode int
|
||||||
Error string
|
Error string
|
||||||
|
Redirect *RedirectInfo
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lc *LinkChecker) normalizeURL(rawURL string) string {
|
func (lc *LinkChecker) normalizeURL(rawURL string) string {
|
||||||
|
@ -81,7 +87,8 @@ func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenL
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, link := range links {
|
for _, link := range links {
|
||||||
if status, err := lc.isLinkValid(link); status >= 400 || err != nil {
|
status, redirect, err := lc.isLinkValid(link)
|
||||||
|
if status >= 400 || err != nil {
|
||||||
broken := BrokenLink{URL: link}
|
broken := BrokenLink{URL: link}
|
||||||
if err != nil {
|
if err != nil {
|
||||||
broken.Error = err.Error()
|
broken.Error = err.Error()
|
||||||
|
@ -89,6 +96,13 @@ func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenL
|
||||||
broken.StatusCode = status
|
broken.StatusCode = status
|
||||||
}
|
}
|
||||||
brokenLinks = append(brokenLinks, broken)
|
brokenLinks = append(brokenLinks, broken)
|
||||||
|
} else if redirect != nil {
|
||||||
|
broken := BrokenLink{
|
||||||
|
URL: link,
|
||||||
|
StatusCode: status,
|
||||||
|
Redirect: redirect,
|
||||||
|
}
|
||||||
|
brokenLinks = append(brokenLinks, broken)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Recursively check links from the same domain
|
// Recursively check links from the same domain
|
||||||
|
@ -154,12 +168,35 @@ func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) {
|
||||||
return links, nil
|
return links, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lc *LinkChecker) isLinkValid(link string) (int, error) {
|
func (lc *LinkChecker) isLinkValid(link string) (int, *RedirectInfo, error) {
|
||||||
resp, err := lc.client.Get(link)
|
client := &http.Client{
|
||||||
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
||||||
|
return http.ErrUseLastResponse
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
resp, err := client.Get(link)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return 0, err
|
return 0, nil, err
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
return resp.StatusCode, nil
|
if resp.StatusCode >= 300 && resp.StatusCode < 400 {
|
||||||
|
location := resp.Header.Get("Location")
|
||||||
|
if location != "" {
|
||||||
|
redirectURL := location
|
||||||
|
if !strings.HasPrefix(location, "http") {
|
||||||
|
baseURL, _ := url.Parse(link)
|
||||||
|
if relative, err := baseURL.Parse(location); err == nil {
|
||||||
|
redirectURL = relative.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return resp.StatusCode, &RedirectInfo{
|
||||||
|
FromURL: link,
|
||||||
|
ToURL: redirectURL,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return resp.StatusCode, nil, nil
|
||||||
}
|
}
|
||||||
|
|
4
main.go
4
main.go
|
@ -27,10 +27,12 @@ func main() {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
fmt.Printf("Found %d broken links:\n", len(brokenLinks))
|
fmt.Printf("Found %d issues:\n", len(brokenLinks))
|
||||||
for _, link := range brokenLinks {
|
for _, link := range brokenLinks {
|
||||||
if link.Error != "" {
|
if link.Error != "" {
|
||||||
fmt.Printf("- %s (Error: %s)\n", link.URL, link.Error)
|
fmt.Printf("- %s (Error: %s)\n", link.URL, link.Error)
|
||||||
|
} else if link.Redirect != nil {
|
||||||
|
fmt.Printf("- %s (Redirect %d -> %s)\n", link.URL, link.StatusCode, link.Redirect.ToURL)
|
||||||
} else {
|
} else {
|
||||||
fmt.Printf("- %s (Status: %d)\n", link.URL, link.StatusCode)
|
fmt.Printf("- %s (Status: %d)\n", link.URL, link.StatusCode)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue