feat: Implement basic link checker in Go with URL parsing and link validation

This commit is contained in:
Erik Winter (aider) 2024-11-25 09:49:01 +01:00
commit 5d646091a2
2 changed files with 130 additions and 0 deletions

98
linkchecker.go Normal file
View File

@ -0,0 +1,98 @@
package main
import (
"fmt"
"net/http"
"net/url"
"strings"
"golang.org/x/net/html"
)
type LinkChecker struct {
client *http.Client
}
func NewLinkChecker() *LinkChecker {
return &LinkChecker{
client: &http.Client{},
}
}
func (lc *LinkChecker) CheckLinks(baseURL string) ([]string, error) {
// Get all links from the page
links, err := lc.getLinks(baseURL)
if err != nil {
return nil, fmt.Errorf("error getting links: %w", err)
}
var brokenLinks []string
// Check each link
for _, link := range links {
if !lc.isLinkValid(link) {
brokenLinks = append(brokenLinks, link)
}
}
return brokenLinks, nil
}
func (lc *LinkChecker) getLinks(pageURL string) ([]string, error) {
resp, err := lc.client.Get(pageURL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("failed to fetch page: %s", resp.Status)
}
doc, err := html.Parse(resp.Body)
if err != nil {
return nil, err
}
var links []string
baseURL, err := url.Parse(pageURL)
if err != nil {
return nil, err
}
var traverse func(*html.Node)
traverse = func(n *html.Node) {
if n.Type == html.ElementNode && n.Data == "a" {
for _, attr := range n.Attr {
if attr.Key == "href" {
link := attr.Val
if !strings.HasPrefix(link, "http") {
// Convert relative URLs to absolute
if absURL, err := baseURL.Parse(link); err == nil {
link = absURL.String()
}
}
if strings.HasPrefix(link, "http") {
links = append(links, link)
}
}
}
}
for c := n.FirstChild; c != nil; c = c.NextSibling {
traverse(c)
}
}
traverse(doc)
return links, nil
}
func (lc *LinkChecker) isLinkValid(link string) bool {
resp, err := lc.client.Get(link)
if err != nil {
return false
}
defer resp.Body.Close()
return resp.StatusCode >= 200 && resp.StatusCode < 400
}

32
main.go Normal file
View File

@ -0,0 +1,32 @@
package main
import (
"flag"
"fmt"
"log"
)
func main() {
url := flag.String("url", "", "URL to check for broken links")
flag.Parse()
if *url == "" {
log.Fatal("Please provide a URL using the -url flag")
}
checker := NewLinkChecker()
brokenLinks, err := checker.CheckLinks(*url)
if err != nil {
log.Fatal(err)
}
if len(brokenLinks) == 0 {
fmt.Println("No broken links found!")
return
}
fmt.Println("Found broken links:")
for _, link := range brokenLinks {
fmt.Printf("- %s\n", link)
}
}