package main import ( "fmt" "net/http" "net/url" "strings" "golang.org/x/net/html" ) type LinkChecker struct { client *http.Client visited map[string]bool } func NewLinkChecker() *LinkChecker { return &LinkChecker{ client: &http.Client{}, visited: make(map[string]bool), } } type BrokenLink struct { URL string StatusCode int Error string } func (lc *LinkChecker) isSameDomain(baseURL, link string) bool { base, err := url.Parse(baseURL) if err != nil { return false } target, err := url.Parse(link) if err != nil { return false } return base.Host == target.Host } func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) { return lc.checkLinksRecursive(baseURL, make([]BrokenLink, 0)) } func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenLink) ([]BrokenLink, error) { if lc.visited[pageURL] { return brokenLinks, nil } lc.visited[pageURL] = true links, err := lc.getLinks(pageURL) if err != nil { return nil, fmt.Errorf("error getting links: %w", err) } for _, link := range links { if status, err := lc.isLinkValid(link); status >= 400 || err != nil { broken := BrokenLink{URL: link} if err != nil { broken.Error = err.Error() } else { broken.StatusCode = status } brokenLinks = append(brokenLinks, broken) } // Recursively check links from the same domain if lc.isSameDomain(pageURL, link) && !lc.visited[link] { recursiveLinks, err := lc.checkLinksRecursive(link, brokenLinks) if err != nil { continue // Skip this page if there's an error, but continue checking others } brokenLinks = recursiveLinks } } return brokenLinks, nil } func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) { resp, err := lc.client.Get(pageURL) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to fetch page: %s", resp.Status) } doc, err := html.Parse(resp.Body) if err != nil { return nil, err } var links []string baseURL, err := url.Parse(pageURL) if err != nil { return nil, err } var traverse func(*html.Node) traverse = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, attr := range n.Attr { if attr.Key == "href" { link := attr.Val if !strings.HasPrefix(link, "http") { // Convert relative URLs to absolute if absURL, err := baseURL.Parse(link); err == nil { link = absURL.String() } } if strings.HasPrefix(link, "http") { links = append(links, link) } } } } for c := n.FirstChild; c != nil; c = c.NextSibling { traverse(c) } } traverse(doc) return links, nil } func (lc *LinkChecker) isLinkValid(link string) (int, error) { resp, err := lc.client.Get(link) if err != nil { return 0, err } defer resp.Body.Close() return resp.StatusCode, nil }