Is it a better way to do parallel programming that this?

Hey.
so I made this script for getting the follower count of “influencers” from instagram
the “runtime” number I am getting from it is between 550-750ms It is not that bad, but I am wondering whether it could be better or not (as I am a golang noob - learning it 3 weeks only)

package main

import (
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"sync"
	"time"
)

type user struct {
	User userData `json:"user"`
}

type userData struct {
	Followers count `json:"followed_by"`
}

type count struct {
	Count int `json:"count"`
}

func getFollowerCount(in <-chan string) <-chan int {
	out := make(chan int)
	go func() {
		for un := range in {
			URL := "https://www.instagram.com/" + un + "/?__a=1"
			resp, err := http.Get(URL)
			if err != nil {
				// handle error
				fmt.Println(err)
			}
			defer resp.Body.Close()
			body, err := ioutil.ReadAll(resp.Body)
			var u user
			err = json.Unmarshal(body, &u)
			if err != nil {
				fmt.Println(err)
			}
			// return u.User.Followers.Count
			out <- u.User.Followers.Count
		}
		close(out)
	}()
	return out
}

func merge(cs ...<-chan int) <-chan int {
	var wg sync.WaitGroup
	out := make(chan int)
	output := func(c <-chan int) {
		for n := range c {
			out <- n
		}
		wg.Done()
	}

	wg.Add(len(cs))
	for _, c := range cs {
		go output(c)
	}
	go func() {
		wg.Wait()
		close(out)
	}()
	return out
}

func gen(users ...string) <-chan string {
	out := make(chan string)
	go func() {
		for _, u := range users {
			out <- u
		}
		close(out)
	}()
	return out
}

func main() {
	start := time.Now()
	fmt.Println("STARTING UP")
	usrs := []string{"kanywest", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
	in := gen(usrs...)
	d1 := getFollowerCount(in)
	d2 := getFollowerCount(in)
	d3 := getFollowerCount(in)
	d4 := getFollowerCount(in)
	d5 := getFollowerCount(in)
	d6 := getFollowerCount(in)
	d7 := getFollowerCount(in)
	d8 := getFollowerCount(in)
	d9 := getFollowerCount(in)
	d10 := getFollowerCount(in)

	for d := range merge(d1, d2, d3, d4, d5, d6, d7, d8, d9, d10) {
		fmt.Println(d)
	}

	elapsed := time.Since(start)
	log.Println("runtime", elapsed)
}

thank you, have a nice day

I wrote a tutorial that will take you through the steps and tools used for improving performance.

https://pocketgophers.com/concurrency-slower/

I agree with Nathan, you are complicating things a bit with wasteful use of channels.

Concurrency is fine to use here, if we assume that getting the followers count needs a network request (for example to Instagram API), and that is slow.

You should simply:

  1. Create one input channel for the names.
  2. Create one output channel for the resulting pairs (name, follower count). It could be structure.
  3. Set up a number of workers that all read from the same input channel. The number of workers is the parallelism you want for the slow (network) part of processing: getting the followers count.
  4. Loop on your names slice and put the names on the input channel.
  5. Collect and print all resulting pairs by looping on the output channel.

Remember that you also need error handling and a way to stop all workers when you have nothing more to put on the input channel. Also, you need to close the output channel when all workers are done to terminate the collector loop.

sooo like this?

package main

import (
	"encoding/json"
	"fmt"
	"net/http"
	"sync"
	"time"
)

type user struct {
	User userData `json:"user"`
}

type userData struct {
	Followers count `json:"followed_by"`
}

type count struct {
	Count int `json:"count"`
}

type influencer struct {
	Name  string
	Count int
}

func getFollowerCount(username string, result chan<- influencer, wg *sync.WaitGroup) {
	defer wg.Done()
	reqURL := "https://www.instagram.com/" + username + "/?__a=1"
	resp, err := http.Get(reqURL)
	if err != nil {
		fmt.Println(username, err)
		return
	}
	defer resp.Body.Close()

	var u user
	if err := json.NewDecoder(resp.Body).Decode(&u); err != nil {
		fmt.Println(username, err)
		return
	}
	result <- influencer{username, u.User.Followers.Count}
}

func execute(users []string, result chan<- influencer) {
	wg := &sync.WaitGroup{}
	for _, username := range users {
		wg.Add(1)
		go getFollowerCount(username, result, wg)
	}
	wg.Wait()
	result <- influencer{"", 0}
}

func main() {
	start := time.Now()
	fmt.Println("STARTING UP")
	usrs := []string{"lildickygram", "kimkardashian", "groovyq", "kendricklamar", "barackobama", "asaprocky", "champagnepapi", "eminem", "drdre", "g_eazy", "skrillex"}
	result := make(chan influencer)
	go execute(usrs, result)
	empty := influencer{"", 0}
	for v := range result {
		if v == empty {
			break
		}
		fmt.Println(v)
	}

	elapsed := time.Since(start)
	fmt.Println("runtime", elapsed)
}

the main changes are in the getFollowerCount func, a new execute func, the gen func was removed
added type influencer
and as per these changes the main func was modified

Almost, you basically only followed points 4 and 5. Try to implement 1, 2, 3 too :slight_smile:

I think I dont understand the third point then.

it’s basically the

d1 := getFollowerCount(in)
d2 := getFollowerCount(in)
d3 := getFollowerCount(in)
d4 := getFollowerCount(in)

but how I am getting the result without the merge function?

You will need two channels: one where you send each user from the slice and the other one is “result”.

By the way, to end the range loops on channels, don’t send the empty value, just close the channel and the loop will terminate (you can remove the if-break.)

not sure whether I am missing something or not, but isn’t that basically my original code?
https://play.golang.org/p/7T9_5UL8Va < - here if I add multiple workers (w2,w3…) how can I merge them without using another chan?

ps.: sorry for the dumb questions :smile:

Basically this way: https://play.golang.org/p/oH7Os-RIBi

See how it uses two channels and a loop to start the workers.

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.