package main import ( "fmt" "net/http" "net/url" "strings" "golang.org/x/net/html" ) type LinkChecker struct { client *http.Client } func NewLinkChecker() *LinkChecker { return &LinkChecker{ client: &http.Client{}, } } type BrokenLink struct { URL string StatusCode int Error string } func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) { // Get all links from the page links, err := lc.getLinks(baseURL) if err != nil { return nil, fmt.Errorf("error getting links: %w", err) } var brokenLinks []BrokenLink // Check each link for _, link := range links { if status, err := lc.isLinkValid(link); status >= 400 || err != nil { broken := BrokenLink{URL: link} if err != nil { broken.Error = err.Error() } else { broken.StatusCode = status } brokenLinks = append(brokenLinks, broken) } } return brokenLinks, nil } func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) { resp, err := lc.client.Get(pageURL) if err != nil { return nil, err } defer resp.Body.Close() if resp.StatusCode != http.StatusOK { return nil, fmt.Errorf("failed to fetch page: %s", resp.Status) } doc, err := html.Parse(resp.Body) if err != nil { return nil, err } var links []string baseURL, err := url.Parse(pageURL) if err != nil { return nil, err } var traverse func(*html.Node) traverse = func(n *html.Node) { if n.Type == html.ElementNode && n.Data == "a" { for _, attr := range n.Attr { if attr.Key == "href" { link := attr.Val if !strings.HasPrefix(link, "http") { // Convert relative URLs to absolute if absURL, err := baseURL.Parse(link); err == nil { link = absURL.String() } } if strings.HasPrefix(link, "http") { links = append(links, link) } } } } for c := n.FirstChild; c != nil; c = c.NextSibling { traverse(c) } } traverse(doc) return links, nil } func (lc *LinkChecker) isLinkValid(link string) (int, error) { resp, err := lc.client.Get(link) if err != nil { return 0, err } defer resp.Body.Close() return resp.StatusCode, nil }