feat: Implement recursive link checking within the same domain
This commit is contained in:
parent
43d7a785ab
commit
7020dbd47d
|
@ -11,11 +11,13 @@ import (
|
||||||
|
|
||||||
type LinkChecker struct {
|
type LinkChecker struct {
|
||||||
client *http.Client
|
client *http.Client
|
||||||
|
visited map[string]bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewLinkChecker() *LinkChecker {
|
func NewLinkChecker() *LinkChecker {
|
||||||
return &LinkChecker{
|
return &LinkChecker{
|
||||||
client: &http.Client{},
|
client: &http.Client{},
|
||||||
|
visited: make(map[string]bool),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,16 +27,35 @@ type BrokenLink struct {
|
||||||
Error string
|
Error string
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (lc *LinkChecker) isSameDomain(baseURL, link string) bool {
|
||||||
|
base, err := url.Parse(baseURL)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
target, err := url.Parse(link)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
return base.Host == target.Host
|
||||||
|
}
|
||||||
|
|
||||||
func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) {
|
func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) {
|
||||||
// Get all links from the page
|
return lc.checkLinksRecursive(baseURL, make([]BrokenLink, 0))
|
||||||
links, err := lc.getLinks(baseURL)
|
}
|
||||||
|
|
||||||
|
func (lc *LinkChecker) checkLinksRecursive(pageURL string, brokenLinks []BrokenLink) ([]BrokenLink, error) {
|
||||||
|
if lc.visited[pageURL] {
|
||||||
|
return brokenLinks, nil
|
||||||
|
}
|
||||||
|
lc.visited[pageURL] = true
|
||||||
|
|
||||||
|
links, err := lc.getLinks(pageURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("error getting links: %w", err)
|
return nil, fmt.Errorf("error getting links: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var brokenLinks []BrokenLink
|
|
||||||
|
|
||||||
// Check each link
|
|
||||||
for _, link := range links {
|
for _, link := range links {
|
||||||
if status, err := lc.isLinkValid(link); status >= 400 || err != nil {
|
if status, err := lc.isLinkValid(link); status >= 400 || err != nil {
|
||||||
broken := BrokenLink{URL: link}
|
broken := BrokenLink{URL: link}
|
||||||
|
@ -45,6 +66,15 @@ func (lc *LinkChecker) CheckLinks(baseURL string) ([]BrokenLink, error) {
|
||||||
}
|
}
|
||||||
brokenLinks = append(brokenLinks, broken)
|
brokenLinks = append(brokenLinks, broken)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Recursively check links from the same domain
|
||||||
|
if lc.isSameDomain(pageURL, link) && !lc.visited[link] {
|
||||||
|
recursiveLinks, err := lc.checkLinksRecursive(link, brokenLinks)
|
||||||
|
if err != nil {
|
||||||
|
continue // Skip this page if there's an error, but continue checking others
|
||||||
|
}
|
||||||
|
brokenLinks = recursiveLinks
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return brokenLinks, nil
|
return brokenLinks, nil
|
||||||
|
|
Loading…
Reference in New Issue