Sync Map is underused?

Moin! :sun_with_face: :waving_hand:

I did a bit of research for fun and came across a surprising result.
sync.map is actually winning my benchmarks. :thinking:

I remember it beeing not so fast, did it get some improvements?

It basically wins alls benchmarks, even if I asked the AI to design worst cases for it?
Are I doing something wrong?

I always use RWMutex and after seeing this I am questioning myself.

Can someone share a test where the sync map loses? :sweat_smile:

package main

import (
	"fmt"
	"runtime"
	"sync"
	"time"
)

// --- RWMutex ---

type RWData struct {
	mu      sync.RWMutex
	counter int
	values  map[string]int
}

func (s *RWData) Write(key string, val int) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.counter++
	s.values[key] = val
}

func (s *RWData) Read(key string) (int, bool) {
	s.mu.RLock()
	defer s.mu.RUnlock()
	v, ok := s.values[key]
	return v, ok
}

func (s *RWData) Delete(key string) {
	s.mu.Lock()
	defer s.mu.Unlock()
	delete(s.values, key)
}

// --- plain Mutex ---

type MutexData struct {
	mu      sync.Mutex
	counter int
	values  map[string]int
}

func (s *MutexData) Write(key string, val int) {
	s.mu.Lock()
	defer s.mu.Unlock()
	s.counter++
	s.values[key] = val
}

func (s *MutexData) Read(key string) (int, bool) {
	s.mu.Lock()
	defer s.mu.Unlock()
	v, ok := s.values[key]
	return v, ok
}

func (s *MutexData) Delete(key string) {
	s.mu.Lock()
	defer s.mu.Unlock()
	delete(s.values, key)
}

// --- sync.Map ---

type SyncMapData struct {
	m sync.Map
}

func (s *SyncMapData) Write(key string, val int) {
	s.m.Store(key, val)
}

func (s *SyncMapData) Read(key string) (int, bool) {
	v, ok := s.m.Load(key)
	if !ok {
		return 0, false
	}
	return v.(int), true
}

// --- Channel event-driven ---

type writeReq struct {
	key string
	val int
}

type readReq struct {
	key  string
	resp chan readResp
}

type readResp struct {
	val int
	ok  bool
}

type ChannelData struct {
	writeCh chan writeReq
	readCh  chan readReq
	done    chan struct{}
}

func NewChannelData(bufSize int) *ChannelData {
	cd := &ChannelData{
		writeCh: make(chan writeReq, bufSize),
		readCh:  make(chan readReq, bufSize),
		done:    make(chan struct{}),
	}
	go cd.loop()
	return cd
}

func (cd *ChannelData) loop() {
	values := make(map[string]int)
	for {
		select {
		case w := <-cd.writeCh:
			values[w.key] = w.val
		case r := <-cd.readCh:
			v, ok := values[r.key]
			r.resp <- readResp{v, ok}
		case <-cd.done:
			for {
				select {
				case w := <-cd.writeCh:
					values[w.key] = w.val
				default:
					return
				}
			}
		}
	}
}

func (cd *ChannelData) Write(key string, val int) {
	cd.writeCh <- writeReq{key, val}
}

func (cd *ChannelData) Read(key string) (int, bool) {
	resp := make(chan readResp, 1)
	cd.readCh <- readReq{key, resp}
	r := <-resp
	return r.val, r.ok
}

func (cd *ChannelData) Stop() {
	close(cd.done)
}

// --- Benchmark harness ---

type BenchResult struct {
	Name    string
	Elapsed time.Duration
	OpsPerS float64
	MemUsed uint64
}

func memUsage() uint64 {
	var m runtime.MemStats
	runtime.GC()
	runtime.ReadMemStats(&m)
	return m.Alloc
}

func runBench(name string, numWriters, numReaders, opsPerWorker int,
	writeFn func(id, i int), readFn func(id, i int)) BenchResult {

	runtime.GC()
	memBefore := memUsage()

	var wg sync.WaitGroup
	start := time.Now()

	for w := range numWriters {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			for i := range opsPerWorker {
				writeFn(id, i)
			}
		}(w)
	}

	for r := range numReaders {
		wg.Add(1)
		go func(id int) {
			defer wg.Done()
			for i := range opsPerWorker {
				readFn(id, i)
			}
		}(r)
	}

	wg.Wait()
	elapsed := time.Since(start)
	memAfter := memUsage()

	totalOps := (numWriters + numReaders) * opsPerWorker
	memDelta := uint64(0)
	if memAfter > memBefore {
		memDelta = memAfter - memBefore
	}

	return BenchResult{
		Name:    name,
		Elapsed: elapsed,
		OpsPerS: float64(totalOps) / elapsed.Seconds(),
		MemUsed: memDelta,
	}
}

// scenario config
type Scenario struct {
	Name    string
	Writers int
	Readers int
	OpsPerW int
	NumKeys int  // unique keys in the map
	Churn   bool // write+delete cycle (worst case for sync.Map)
}

func main() {
	cores := runtime.NumCPU()
	fmt.Printf("Cores: %d\n\n", cores)

	scenarios := []Scenario{
		{"Read-heavy (1W:8R, 16 keys)", 2, cores * 2, 100_000, 16, false},
		{"Write-heavy (8W:1R, 16 keys)", cores * 2, 2, 100_000, 16, false},
		{"Balanced (1:1, 16 keys)", cores, cores, 100_000, 16, false},
		{"Balanced (1:1, 10k keys)", cores, cores, 100_000, 10_000, false},
		{"Balanced (1:1, 1M keys)", cores, cores, 100_000, 1_000_000, false},
		{"Read-heavy (1W:8R, 1M keys)", 2, cores * 2, 100_000, 1_000_000, false},
		{"Write-heavy (8W:1R, 1M keys)", cores * 2, 2, 100_000, 1_000_000, false},
		// sync.Map worst cases: constant churn (write+delete, never stable)
		{"CHURN all-write+delete (16 keys)", cores * 2, 0, 100_000, 16, true},
		{"CHURN all-write+delete (10k keys)", cores * 2, 0, 100_000, 10_000, true},
		{"CHURN all-write+delete (1M keys)", cores * 2, 0, 100_000, 1_000_000, true},
		{"CHURN write+delete + readers (1M keys)", cores, cores, 100_000, 1_000_000, true},
	}

	types := []string{"sync.RWMutex", "sync.Mutex", "sync.Map", "Channel"}

	for _, sc := range scenarios {
		totalOps := (sc.Writers + sc.Readers) * sc.OpsPerW
		fmt.Printf("=== %s ===\n", sc.Name)
		fmt.Printf("    Writers: %d, Readers: %d, Ops/worker: %d, Total: %d\n\n",
			sc.Writers, sc.Readers, sc.OpsPerW, totalOps)

		results := make([]BenchResult, len(types))

		for t, typ := range types {
			keyFn := func(id, i int) string {
				return fmt.Sprintf("k%d", (id*1000+i)%sc.NumKeys)
			}
			// delete a different key to maximize churn
			delKeyFn := func(id, i int) string {
				return fmt.Sprintf("k%d", (id*1000+i+sc.NumKeys/2)%sc.NumKeys)
			}
			readKeyFn := func(id, i int) string {
				return fmt.Sprintf("k%d", (id*1000+i)%sc.NumKeys)
			}

			switch typ {
			case "sync.RWMutex":
				d := &RWData{values: make(map[string]int)}
				wFn := func(id, i int) { d.Write(keyFn(id, i), i) }
				if sc.Churn {
					wFn = func(id, i int) {
						d.Write(keyFn(id, i), i)
						d.Delete(delKeyFn(id, i))
					}
				}
				results[t] = runBench(typ, sc.Writers, sc.Readers, sc.OpsPerW,
					wFn, func(id, i int) { d.Read(readKeyFn(id, i)) })
			case "sync.Mutex":
				d := &MutexData{values: make(map[string]int)}
				wFn := func(id, i int) { d.Write(keyFn(id, i), i) }
				if sc.Churn {
					wFn = func(id, i int) {
						d.Write(keyFn(id, i), i)
						d.Delete(delKeyFn(id, i))
					}
				}
				results[t] = runBench(typ, sc.Writers, sc.Readers, sc.OpsPerW,
					wFn, func(id, i int) { d.Read(readKeyFn(id, i)) })
			case "sync.Map":
				d := &SyncMapData{}
				wFn := func(id, i int) { d.Write(keyFn(id, i), i) }
				if sc.Churn {
					wFn = func(id, i int) {
						d.m.Store(keyFn(id, i), i)
						d.m.Delete(delKeyFn(id, i))
					}
				}
				results[t] = runBench(typ, sc.Writers, sc.Readers, sc.OpsPerW,
					wFn, func(id, i int) { d.Read(readKeyFn(id, i)) })
			case "Channel":
				d := NewChannelData(256)
				// channel doesn't support delete, just double-write
				wFn := func(id, i int) { d.Write(keyFn(id, i), i) }
				if sc.Churn {
					wFn = func(id, i int) {
						d.Write(keyFn(id, i), i)
						d.Write(delKeyFn(id, i), -1)
					}
				}
				results[t] = runBench(typ, sc.Writers, sc.Readers, sc.OpsPerW,
					wFn, func(id, i int) { d.Read(readKeyFn(id, i)) })
				d.Stop()
			}
		}

		fmt.Printf("    %-16s %12s %14s %12s\n", "Type", "Time", "Ops/sec", "Mem delta")
		fmt.Println("    --------------------------------------------------------")
		for _, r := range results {
			fmt.Printf("    %-16s %12v %14.0f %10.2f MB\n",
				r.Name, r.Elapsed.Round(time.Millisecond), r.OpsPerS, float64(r.MemUsed)/(1024*1024))
		}
		fmt.Println()
	}
}

Output

Cores: 16

=== Read-heavy (1W:8R, 16 keys) ===
    Writers: 2, Readers: 32, Ops/worker: 100000, Total: 3400000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            368ms        9226798       0.08 MB
    sync.Mutex              322ms       10569387       0.03 MB
    sync.Map                 46ms       73917538       0.02 MB
    Channel                1.319s        2578048       0.05 MB

=== Write-heavy (8W:1R, 16 keys) ===
    Writers: 32, Readers: 2, Ops/worker: 100000, Total: 3400000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            493ms        6899619       0.02 MB
    sync.Mutex              478ms        7114484       0.03 MB
    sync.Map                387ms        8784015       0.02 MB
    Channel                 644ms        5278396       0.02 MB

=== Balanced (1:1, 16 keys) ===
    Writers: 16, Readers: 16, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            414ms        7720794       0.03 MB
    sync.Mutex              406ms        7889343       0.02 MB
    sync.Map                114ms       28190883       0.02 MB
    Channel                 943ms        3392164       0.02 MB

=== Balanced (1:1, 10k keys) ===
    Writers: 16, Readers: 16, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            507ms        6308529       0.03 MB
    sync.Mutex              500ms        6398072       0.02 MB
    sync.Map                 77ms       41323475       0.01 MB
    Channel                1.018s        3143832       0.57 MB

=== Balanced (1:1, 1M keys) ===
    Writers: 16, Readers: 16, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            520ms        6154994       0.03 MB
    sync.Mutex              548ms        5839895       0.01 MB
    sync.Map                 72ms       44333027       0.01 MB
    Channel                1.007s        3177374       6.82 MB

=== Read-heavy (1W:8R, 1M keys) ===
    Writers: 2, Readers: 32, Ops/worker: 100000, Total: 3400000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            374ms        9095386       0.00 MB
    sync.Mutex              454ms        7483909       0.02 MB
    sync.Map                 61ms       55592569       0.01 MB
    Channel                1.382s        2459893       4.89 MB

=== Write-heavy (8W:1R, 1M keys) ===
    Writers: 32, Readers: 2, Ops/worker: 100000, Total: 3400000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            662ms        5137202       0.02 MB
    sync.Mutex              634ms        5359924       0.02 MB
    sync.Map                 95ms       35720283       0.01 MB
    Channel                 751ms        4525117       8.68 MB

=== CHURN all-write+delete (16 keys) ===
    Writers: 32, Readers: 0, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex           1.033s        3096739       0.00 MB
    sync.Mutex             1.002s        3194882       0.01 MB
    sync.Map                683ms        4687663       0.00 MB
    Channel                 1.11s        2882805       0.01 MB

=== CHURN all-write+delete (10k keys) ===
    Writers: 32, Readers: 0, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex           1.326s        2412755       0.02 MB
    sync.Mutex             1.269s        2521417       0.01 MB
    sync.Map                202ms       15825116       0.00 MB
    Channel                1.233s        2594732       0.58 MB

=== CHURN all-write+delete (1M keys) ===
    Writers: 32, Readers: 0, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex           1.184s        2701938       0.01 MB
    sync.Mutex             1.108s        2888610       0.01 MB
    sync.Map                144ms       22235046       0.01 MB
    Channel                1.437s        2226598      17.34 MB

=== CHURN write+delete + readers (1M keys) ===
    Writers: 16, Readers: 16, Ops/worker: 100000, Total: 3200000

    Type                     Time        Ops/sec    Mem delta
    --------------------------------------------------------
    sync.RWMutex            785ms        4075690       0.00 MB
    sync.Mutex              748ms        4278293       0.01 MB
    sync.Map                 95ms       33640567       0.01 MB
    Channel                1.388s        2305032      13.54 MB
1 Like

Hello, why not to use internal benchmark of the testing package for the same reason? Reads are always faster with sync.Map, since it has an atomic access read map in addition to write map. The problems start to be visible on heavy writes. When under the hood sync.Map copies entire data into the reader with atomics. It has its benefits. If you write less things than read, then it can be a good option. But when you do a lot of writes/deletes of unique elements it falls behind specifically due to memory consumption.

1 Like

Interesting. I think the docs might be a little misleading/outdated:

The Map type is specialized. Most code should use a plain Go map instead, with separate locking or coordination, for better type safety and to make it easier to maintain other invariants along with the map content.

The Map type is optimized for two common use cases: (1) when the entry for a given key is only ever written once but read many times, as in caches that only grow, or (2) when multiple goroutines read, write, and overwrite entries for disjoint sets of keys. In these two cases, use of a Map may significantly reduce lock contention compared to a Go map paired with a separate Mutex or RWMutex.

I dug into the source a tad and noticed it is using a new-ish internal/HashTrieMap. That struct has this comment:

// HashTrieMap is an implementation of a concurrent hash-trie. The implementation
// is designed around frequent loads, but offers decent performance for stores
// and deletes as well, especially if the map is larger. Its primary use-case is
// the unique package, but can be used elsewhere as well.

… and this was an experiment until mid-2025:

https://cs.opensource.google/go/go/+/e15a14c4ddcb7854ecb5fb2f6dc01e8933a11652

Which lead me to this issue:

HashTrieMap was added for the unique package, but it turns out that it’s faster than Map in many cases, including its own microbenchmarks.

So - I think you are running into something the Go core team optimized. And it appears the godoc is lagging behind.

1 Like

I’ve run some tests and I’d say it looks like the truth. Writes/Deletes are also really fast, but at the same time memory consumption was huge for the map.

BenchmarkRWData_Write-10                         2096450               563.1 ns/op             0 B/op          0 allocs/op
BenchmarkMutexData_Write-10                      3004311               399.4 ns/op             0 B/op          0 allocs/op
BenchmarkSyncMapData_Write-10                    1533368               787.4 ns/op            64 B/op          2 allocs/op

BenchmarkRWData_Read-10                          2926944               406.5 ns/op             0 B/op          0 allocs/op
BenchmarkMutexData_Read-10                       3190815               376.3 ns/op             0 B/op          0 allocs/op
BenchmarkSyncMapData_Read-10                     3619648               331.4 ns/op             0 B/op          0 allocs/op

BenchmarkRWData_ParallelWrite-10                  740858              1900 ns/op             163 B/op          2 allocs/op
BenchmarkMutexData_ParallelWrite-10               642082              2086 ns/op             175 B/op          2 allocs/op
BenchmarkSyncMapData_ParallelWrite-10            1000000              1301 ns/op             225 B/op          5 allocs/op

BenchmarkMixed_WriteHeavy_16Keys/RWMutex-10               748530              1606 ns/op              72 B/op          1 allocs/op
BenchmarkMixed_WriteHeavy_16Keys/Mutex-10                 774872              1496 ns/op              72 B/op          1 allocs/op
BenchmarkMixed_WriteHeavy_16Keys/SyncMap-10               941096              1138 ns/op             136 B/op          3 allocs/op

BenchmarkWriteBurst/RWMutex-10                            733016              1874 ns/op             164 B/op          2 allocs/op
BenchmarkWriteBurst/Mutex-10                              622605              2062 ns/op             177 B/op          2 allocs/op
BenchmarkWriteBurst/SyncMap-10                           1000000              1130 ns/op             225 B/op          5 allocs/op