Cleaning up waitgroups and channels

Hi! I have a question about usage of waitgroups and channels. I have a simple command that takes a list of files and hashes them concurrently (with numRoutines). I feel like the code I have here can be made simpler but perhaps i’m overthinking it. Can anyone take alook and give me some feedback?
I’m particularly curious about how i can improve the check for how many hashes have been returned by the goroutine (in the go/for/select block).

func analyzeNode(log *logger.LoggerInstance, wg *sync.WaitGroup, in <-chan string, out chan<- models.Node, errored chan<- struct{}) {
	defer wg.Done()

	for filename := range in {
		fileStat, err := os.Stat(filename)
		if err != nil {
			log.Error("Can't read file", err, "file", filename)
			errored <- struct{}{}
			continue
		}

		f, err := os.Open(filename)
		if err != nil {
			log.Error("something went wrong opening file", err, "file", filename)
			errored <- struct{}{}
			continue
		}

		md5Hash := file_ops.MD5Hash(f)
		out <- models.Node{Name: filename, Size: fileStat.Size(), MD5: md5Hash}
		f.Close()
	}
}

var analyzeCmd = &cobra.Command{
	Use:   "analyze",
	Short: "Analyze a set of directories and/or files",
	Run: func(cmd *cobra.Command, args []string) {
		log := logger.CreateLogger("analyze")
		var nl models.NodeList

		log.Trace("analyze file called")

		numRoutines, err := cmd.Flags().GetInt("routines")
		if err != nil {
			log.Error("Can't get numRoutines. Defaulting to 1", err)
			numRoutines = 1
		}

		files := disk_inventory.BuildFileList(args...)
		var wg sync.WaitGroup
		supplier, receiver, errored, done := make(chan string), make(chan models.Node), make(chan struct{}), make(chan struct{})
		received := 0

		for i := 0; i < numRoutines; i++ {
			wg.Add(1)
			go analyzeNode(&log, &wg, supplier, receiver, errored)
		}

		go func() {
			for {
				select {
				case n := <-receiver:
					nl.Append(n)
					received++
				case <-errored:
					received++
				}

				if received == len(files) {
					done <- struct{}{}
					break
				}
			}
		}()

		for _, filename := range files {
			supplier <- filename
		}

		<-done
		close(supplier)
		close(receiver)
		wg.Wait()
	},
}

Thanks!

  • I wonder if you could get file size from the f variable directly.
  • checking len(out) should give you how many unread messages are at channel (note: this is not concurrent safe).
1 Like

Thanks for the first tip! Completely missed that one!

I don’t think I would be needing to find number of unread messages from the channel though. it seems like it would cause bugs relying on it