diff --git a/linkchecker.go b/linkchecker.go index 992ffe2..2a0651a 100644 --- a/linkchecker.go +++ b/linkchecker.go @@ -29,6 +29,24 @@ type BrokenLink struct { Error string } +func (lc *LinkChecker) normalizeURL(rawURL string) string { + u, err := url.Parse(rawURL) + if err != nil { + return rawURL + } + + // Remove fragment + u.Fragment = "" + + // Ensure consistent trailing slash handling + u.Path = strings.TrimSuffix(u.Path, "/") + if u.Path == "" { + u.Path = "/" + } + + return u.String() +} + func (lc *LinkChecker) isSameDomain(baseURL, link string) bool { base, err := url.Parse(baseURL) if err != nil { @@ -48,10 +66,11 @@ func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) { } func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenLink) ([]BrokenLink, error) { - if lc.visited[pageURL] { + normalizedURL := lc.normalizeURL(pageURL) + if lc.visited[normalizedURL] { return brokenLinks, nil } - lc.visited[pageURL] = true + lc.visited[normalizedURL] = true lc.pagesChecked++ fmt.Printf("Checking page %d: %s\n", lc.pagesChecked, pageURL) @@ -73,7 +92,8 @@ func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenL } // Recursively check links from the same domain - if lc.isSameDomain(pageURL, link) && !lc.visited[link] { + normalizedLink := lc.normalizeURL(link) + if lc.isSameDomain(pageURL, link) && !lc.visited[normalizedLink] { recursiveLinks, err := lc.checkLinksRecursive(link, brokenLinks) if err != nil { continue // Skip this page if there's an error, but continue checking others