Help solving having to loop json twice (I want to loop once)

Hello, recently i decided i wanted to get all ads from a job website’s api, i get json in return. In the Json there is many ads for jobs, and also how many pages is available. I’m a trainee so this is good practise.

The problem i am having is that it is very slow having to loop through the json twice, first time to get the totalPages variable, and then secondly to store everything in structs. Secondly i am writing the same code twice, which is really annoying when i hate repeating myself. I am getting some very long run times that i would like to shorten, i wrote the same thing in python and it was actually quicker.

Thanks for any responds in advance! I’m here to learn and move on :slight_smile:
Code:

package main

import (
	"bufio"
	"encoding/json"
	"fmt"
	"io/ioutil"
	"log"
	"net/http"
	"os"
	"strconv"
	"strings"
)

type JsonData struct {
	Content       []JsonContent `json:"content"`
	TotalElements int           `json:"totalElements"`
	PageNumber    int           `json:"pageNumber"`
	PageSize      int           `json:"pageSize"`
	TotalPages    int           `json:"totalPages"`
	First         bool          `json:"first"`
	Last          bool          `json:"last"`
	Sort          string        `json:"sort"`
}

type JsonContent struct {
	Uuid           string                 `json:"uuid"`
	Published      string                 `json:"published"`
	Expires        string                 `json:"expires"`
	Updated        string                 `json:"updated"`
	WorkLoc        []WorkLocations        `json:"workLocations"`
	Title          string                 `json:"title"`
	Description    string                 `json:"description"`
	SourceUrl      string                 `json:"sourceurl"`
	Source         string                 `json:"source"`
	ApplicationDue string                 `json:"applicationDue"`
	OccupationCat  []OccupationCategories `json:"occupationCategories"`
	JobTitle       string                 `json:"jobtitle"`
	Link           string                 `json:"link"`
	Employ         Employer               `json:"employer"`
	EngagementType string                 `json:"engagementtype"`
	Extent         string                 `json:"extent"`
	StartTime      string                 `json:"starttime"`
	PositionCount  interface{}            `json:"positioncount"`
	Sector         string                 `json:"sector"`
}

type WorkLocations struct {
	Country    string `json:"country"`
	Address    string `json:"address"`
	City       string `json:"city"`
	PostalCode string `json:"postalCode"`
	County     string `json:"county"`
	Municipal  string `json:"municipal"`
}

type OccupationCategories struct {
	Level1 string `json:"level1"`
	Level2 string `json:"level2"`
}

type Employer struct {
	Name        string      `json:"name"`
	OrgNr       string      `json:"orgnr"`
	Description string      `json:"description"`
	Homepage    interface{} `json:"homepage"`
}

type TotalPageStruct struct {
	TotalPages    int           `json:"totalPages"`
}

func main() {
	fmt.Println("Starting..")
	var totalPages TotalPageStruct
	var datas JsonData
	var keyWords = []string{"Python", "Golang", "Ansible", "Powershell", "Support", "Kundeservice", "Servicekonsulent", "kundebehandling", "Medarbeider"}

	getTotalPages(&totalPages)

	i := 1
	for i < totalPages.TotalPages {
		url := "https://arbeidsplassen.nav.no/public-feed/api/v1/ads?page=" + strconv.Itoa(i) + "&size=5000&published=%2A&county=Oslo"

		// Create a Bearer string by appending string access token
		var bearer = "Bearer " + "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJwdWJsaWMudG9rZW4udjFAbmF2Lm5vIiwiYXVkIjoiZmVlZC1hcGktdjEiLCJpc3MiOiJuYXYubm8iLCJpYXQiOjE1NTc0NzM0MjJ9.jNGlLUF9HxoHo5JrQNMkweLj_91bgk97ZebLdfx3_UQ"

		// Create a new request using http
		req, err := http.NewRequest("GET", url, nil)

		// add authorization header to the req
		req.Header.Add("Authorization", bearer)

		// Send req using http Client
		client := &http.Client{}

		resp, err := client.Do(req)
		if err != nil {
			log.Fatal(err)
		}

		body, _ := ioutil.ReadAll(resp.Body)

		err = json.Unmarshal(body, &datas)
		if err != nil {
			log.Fatal(err)
		}

		dataMap := lookForWork(&datas, keyWords)
		for k, v := range dataMap {
			if v > 0 {
				fmt.Println(k, v)
			}
		}
		i++
	}
	fmt.Println("Ended..")
	bufio.NewReader(os.Stdin).ReadBytes('\n')

}

func getTotalPages(totalPages *TotalPageStruct) {
	// Getting the totalPages variable
	url := "https://arbeidsplassen.nav.no/public-feed/api/v1/ads?page=1&size=5000&published=%2A&county=Oslo"

	// Create a Bearer string by appending string access token
	var bearer = "Bearer " + "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJwdWJsaWMudG9rZW4udjFAbmF2Lm5vIiwiYXVkIjoiZmVlZC1hcGktdjEiLCJpc3MiOiJuYXYubm8iLCJpYXQiOjE1NTc0NzM0MjJ9.jNGlLUF9HxoHo5JrQNMkweLj_91bgk97ZebLdfx3_UQ"

	// Create a new request using http
	req, err := http.NewRequest("GET", url, nil)

	// add authorization header to the req
	req.Header.Add("Authorization", bearer)

	// Send req using http Client
	client := &http.Client{}

	resp, err := client.Do(req)
	if err != nil {
		log.Fatal(err)
	}

	body, _ := ioutil.ReadAll(resp.Body)

	err = json.Unmarshal(body, &totalPages)
	if err != nil {
		log.Fatal(err)
	}
}

func lookForWork(data *JsonData, keyWords []string) map[string]int {
	var dataMap = map[string]int{}
	for _, v := range data.Content {
		dataMap[v.Link] = 0
		for _, keyWord := range keyWords {
			if strings.Contains(strings.ToLower(v.Description), strings.ToLower(keyWord)) {
				//fmt.Println(v.Link, keyWord)
				dataMap[v.Link] += 1
			}
		}
	}
	return dataMap
}

Hey, I don’t have time to look indepth at the moment but just wanted to suggest that you not include access tokens in your public snippets.

Edit: At a quick look I’m not sure you really need to get the total pages before the rest of the data at all? When I retrieve any page, whether it be 1 or 90, via the above API, I get the “TotalPages” data in the response. You’ve even included that in your JsonData struct. So you could likely just start from page 1 and grab TotalPages from there if you want, or just loop until there’s no listing data in the response.

2 Likes

Thank you for responding. I will take another look at my code and see if i can figure it out.

The API access key is a public one, it’s available for everyone so it wont do any harm.

There is a small mistake in your code. If you initialize i to 1, you then need to loop while i <= totalPages.TotalPages, not i < totalPages.TotalPages.

Another error is that you are missing a resp.Body.Close() just after the body, _ := ioutil.ReadAll(resp.Body) instruction.

You can optimize your code by moving var bearer = "Bearer " + "e... out of the loop. You initialize the variable only once. Same thing for client := &http.Client{}. You can use the same client for all calls. These are just micro optimizations. You won’t see a significant gain in execution time with this changes, but it is a good habit to do so.


As suggested by Liza, you can avoid the call to get the total pages.
Here is how you could do the loop:

...
var datas JsonData
...
datas.TotalPages = 1
for datas.PageNumber < datas.TotalPages {
   url := "https://arbeidsplassen.nav.no/public-feed/api/v1/ads?page=" + strconv.Itoa(datas.PageNumber+1) + "&size=5000&published=%2A&county=Oslo"
   ...
   err = json.Unmarshal(body, &datas)
   ...
}

This will allow you to get rid of getTotalPages(&totalPages).

Note that the above code assumes that

  • datas.PageNumber is initialized to 0
  • pages are numbered from 1 to datas.TotalPages included
2 Likes
package main

import (
	"bufio"
	"bytes"
	"encoding/json"
	"fmt"
	"io"
	"log"
	"net/http"
	"os"
	"strconv"
	"strings"
)

type JsonData struct {
	Content    []JsonContent `json:"content"`
	TotalPages int           `json:"totalPages"`
}

type JsonContent struct {
	Description string `json:"description"`
	Link        string `json:"link"`
}

func main() {
	fmt.Println("Starting..")
	var datas JsonData
	var keyWords = []string{
		strings.ToLower("Python"),
		strings.ToLower("Golang"),
		strings.ToLower("Ansible"),
		strings.ToLower("Powershell"),
		strings.ToLower("Support"),
		strings.ToLower("Kundeservice"),
		strings.ToLower("Servicekonsulent"),
		strings.ToLower("kundebehandling"),
		strings.ToLower("Medarbeider"),
	}

	buffer := bytes.NewBuffer(make([]byte, 0, 4096))
	page := 1
	for {
		if err := request(page, buffer); err != nil {
			log.Fatal(err)
		}

		if err := json.Unmarshal(buffer.Bytes(), &datas); err != nil {
			log.Fatal(err)
		}

		dataMap := lookForWork(&datas, keyWords)
		for k, v := range dataMap {
			if v > 0 {
				fmt.Println(k, v)
			}
		}

		if page >= datas.TotalPages {
			break
		}

		page++
	}

	buffer = nil

	fmt.Println("Ended..")
	bufio.NewReader(os.Stdin).ReadBytes('\n')

}

func request(page int, buffer *bytes.Buffer) error {
	url := "https://arbeidsplassen.nav.no/public-feed/api/v1/ads?page=" + strconv.Itoa(page) + "&size=5000&published=%2A&county=Oslo"
	// Create a Bearer string by appending string access token
	var bearer = "Bearer " + "eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJwdWJsaWMudG9rZW4udjFAbmF2Lm5vIiwiYXVkIjoiZmVlZC1hcGktdjEiLCJpc3MiOiJuYXYubm8iLCJpYXQiOjE1NTc0NzM0MjJ9.jNGlLUF9HxoHo5JrQNMkweLj_91bgk97ZebLdfx3_UQ"
	// Create a new request using http
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return err
	}

	// add authorization header to the req
	req.Header.Add("Authorization", bearer)
	resp, err := http.DefaultClient.Do(req)
	if err != nil {
		if resp != nil {
			resp.Body.Close()
		}
		return err
	}

	defer resp.Body.Close()

	buffer.Reset()
	_, err = io.Copy(buffer, resp.Body)
	return err
}

func lookForWork(data *JsonData, keyWords []string) map[string]int {
	var dataMap = map[string]int{}
	for _, v := range data.Content {
		dataMap[v.Link] = 0
		for _, keyWord := range keyWords {
			if strings.Contains(strings.ToLower(v.Description), keyWord) {
				dataMap[v.Link] += 1
			}
		}
	}
	return dataMap
}

I’ve simplified your code. First of all, I’ve simplified the structure and kept only useful fields to reduce the work of JSON. Unmarshal. I also optimized the HTTP request to read resp. Body. I reused ‘buffer’ to reduce memory allocation.

I adjusted the paging request method. First of all, I think at least one page of data can be used. After the first page of data is requested, I verify whether there is more data to request. If I exit the cycle in time, the program will end.

I can’t compare the performance. Affected by the network situation, I need you to compare the efficiency by yourself to ensure that the program can achieve the effect you expect.

The outer loop can be written much simpler, eg.

for i =1; i < totalPages.TotalPages; i++ {
...
}
1 Like

This topic was automatically closed 90 days after the last reply. New replies are no longer allowed.