From 5d646091a279ecc136bda0ba4b5cb47d9dc58381 Mon Sep 17 00:00:00 2001 From: "Erik Winter (aider)" Date: Mon, 25 Nov 2024 09:49:01 +0100 Subject: [PATCH] feat: Implement basic link checker in Go with URL parsing and link validation --- linkchecker.go | 98 ++++++++++++++++++++++++++++++++++++++++++++++++++ main.go | 32 +++++++++++++++++ 2 files changed, 130 insertions(+) create mode 100644 linkchecker.go create mode 100644 main.go diff --git a/linkchecker.go b/linkchecker.go new file mode 100644 index 0000000..97ef2ba --- /dev/null +++ b/linkchecker.go @@ -0,0 +1,98 @@ +package main + +import ( + "fmt" + "net/http" + "net/url" + "strings" + + "golang.org/x/net/html" +) + +type LinkChecker struct { + client *http.Client +} + +func NewLinkChecker() *LinkChecker { + return &LinkChecker{ + client: &http.Client{}, + } +} + +func (lc *LinkChecker) CheckLinks(baseURL string) ([]string, error) { + // Get all links from the page + links, err := lc.getLinks(baseURL) + if err != nil { + return nil, fmt.Errorf("error getting links: %w", err) + } + + var brokenLinks []string + + // Check each link + for _, link := range links { + if !lc.isLinkValid(link) { + brokenLinks = append(brokenLinks, link) + } + } + + return brokenLinks, nil +} + +func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) { + resp, err := lc.client.Get(pageURL) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("failed to fetch page: %s", resp.Status) + } + + doc, err := html.Parse(resp.Body) + if err != nil { + return nil, err + } + + var links []string + baseURL, err := url.Parse(pageURL) + if err != nil { + return nil, err + } + + var traverse func(*html.Node) + traverse = func(n *html.Node) { + if n.Type == html.ElementNode && n.Data == "a" { + for _, attr := range n.Attr { + if attr.Key == "href" { + link := attr.Val + if !strings.HasPrefix(link, "http") { + // Convert relative URLs to absolute + if absURL, err := baseURL.Parse(link); err == nil { + link = absURL.String() + } + } + if strings.HasPrefix(link, "http") { + links = append(links, link) + } + } + } + } + for c := n.FirstChild; c != nil; c = c.NextSibling { + traverse(c) + } + } + traverse(doc) + + return links, nil +} + +func (lc *LinkChecker) isLinkValid(link string) bool { + resp, err := lc.client.Get(link) + if err != nil { + return false + } + defer resp.Body.Close() + + return resp.StatusCode >= 200 && resp.StatusCode < 400 +} diff --git a/main.go b/main.go new file mode 100644 index 0000000..43f84de --- /dev/null +++ b/main.go @@ -0,0 +1,32 @@ +package main + +import ( + "flag" + "fmt" + "log" +) + +func main() { + url := flag.String("url", "", "URL to check for broken links") + flag.Parse() + + if *url == "" { + log.Fatal("Please provide a URL using the -url flag") + } + + checker := NewLinkChecker() + brokenLinks, err := checker.CheckLinks(*url) + if err != nil { + log.Fatal(err) + } + + if len(brokenLinks) == 0 { + fmt.Println("No broken links found!") + return + } + + fmt.Println("Found broken links:") + for _, link := range brokenLinks { + fmt.Printf("- %s\n", link) + } +}