Hi guys, I’m wondering if you could give me some feedback on the following code.
I’m willing to know if this beginner approach meets the goal of fetching concurrent URLs while syncing visited sites in a map. This is related to this exercise 10 in A tour of Go.
Should I use a structure to hold a map with fetched URLs?
Should I use channels (how would it apply here?)
Is the lock in the correct place?
Something curious is that commenting the time.Sleep(time.Second)
makes the code to get interrupted in the first URL and never fetches the other URLs. Why time.Sleep
is needed here?
package main
import (
"fmt"
"sync"
"time"
"github.com/juniormayhe/currentFilename"
)
type Fetcher interface {
// Fetch returns the body of URL and
// a slice of URLs found on that page.
Fetch(url string) (body string, urls []string, err error)
}
func setVisited(url string, visited map[string]bool, lock sync.Mutex) {
lock.Lock()
visited[url] = true
lock.Unlock()
}
// Crawl uses fetcher to recursively crawl
// pages starting with url, to a maximum of depth.
func Crawl(url string, depth int, fetcher Fetcher, fetched map[string]bool) {
var lock sync.Mutex
_, exist := fetched[url]
if depth <= 0 || exist {
return
}
body, urls, err := fetcher.Fetch(url)
if err != nil {
setVisited(url, fetched, lock)
fmt.Println(err)
return
}
fmt.Printf("found: %s %q\n", url, body)
for _, u := range urls {
go Crawl(u, depth-1, fetcher, fetched)
}
setVisited(url, fetched, lock)
time.Sleep(time.Second)
}
// fakeFetcher is Fetcher that returns canned results.
type fakeFetcher map[string]*fakeResult
type fakeResult struct {
body string
urls []string
}
func (f fakeFetcher) Fetch(url string) (string, []string, error) {
if res, ok := f[url]; ok {
return res.body, res.urls, nil
}
return "", nil, fmt.Errorf("not found: %s", url)
}
// fetcher is a populated fakeFetcher.
var fetcher = fakeFetcher{
"https://golang.org/": &fakeResult{
"The Go Programming Language",
[]string{
"https://golang.org/pkg/",
"https://golang.org/cmd/",
},
},
"https://golang.org/pkg/": &fakeResult{
"Packages",
[]string{
"https://golang.org/",
"https://golang.org/cmd/",
"https://golang.org/pkg/fmt/",
"https://golang.org/pkg/os/",
},
},
"https://golang.org/pkg/fmt/": &fakeResult{
"Package fmt",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
"https://golang.org/pkg/os/": &fakeResult{
"Package os",
[]string{
"https://golang.org/",
"https://golang.org/pkg/",
},
},
}
func main() {
fmt.Println(currentFilename.GetCurrentFileName(), "\n")
fetchedUrls := make(map[string]bool)
Crawl("https://golang.org/", 4, fetcher, fetchedUrls)
fmt.Println(fetchedUrls)
}