map 源码分析

by 夏泽民 Aug 16, 2021

map实现的两个关键数据结构

hmap 定义了map的结构
bmap 定义了hmap.buckets中每个bucket的结构

// map 数据结构

type hmap struct {

    count     int // 元素的个数, len() 的值

    flags     uint8

    B         uint8  // bucket个数为：2^B；可以保存的元素个数： 填充因子(默认6.5) * 2^B 

    noverflow uint16 // 溢出桶数量

    hash0     uint32 // 哈希因子

    buckets    unsafe.Pointer // Buckets数组，大小为 2^B 

    oldbuckets unsafe.Pointer // 前面的Buckets，在增长时非nil

    nevacuate  uintptr        // 迁移状态，进度

    extra *mapextra // optional fields

}

// bucket 数据结构

type bmap struct {

    tophash [bucketCnt]uint8 // bucketCnt 是常量=8，一个bucket最多存储8个key/value对

    // 后面紧跟着8个key

    // 再后面是8个value

    // 最后是overflow的指针

}

比如key的类型为string，value的类型uint8, 在考虑到字节对齐的时候，如果使用k/v的格式存储会浪费内存，使用8个key/8个value的格式会更紧凑。
map 创建

func makemap_small() *hmap 

func makemap(t *maptype, hint int, h *hmap) *hmap 

func makemap64(t *maptype, hint int64, h *hmap) *hmap // hint类型为int64， 实质还是调用的 makemap

当创建map时不指定hint大小，如下面所示的m1。那么调用makemap_small来进行创建
当指定了hint(代表初始化时可以保存的元素的个数)的大小的时候，若hint<=8, 使用makemap_small进行创建map，否则使用makemap创建map

  m1 := make(map[string]string)

  m2 := make(map[string]string, hint)

makemap_small 源码分析
主要是创建hmap结构并初始化hash因子就结束了，并没有初始化buckets

func makemap_small() *hmap {

    h := new(hmap) 

    h.hash0 = fastrand()

    return h

}

makemap源码分析- make(map[string]string, 10)
创建 make(map[string]string, 10) ，由于hint=10, 大于8，因此将使用makemap来实现。

//  hint=10，可以容纳hint个元素

func makemap(t *maptype, hint int, h *hmap) *hmap {

    if hint < 0 || hint > int(maxSliceCap(t.bucket.size)) {

        hint = 0

    } 

    // initialize Hmap

    if h == nil {

        h = new(hmap)

    }

    h.hash0 = fastrand() // hash因子

    // 确定B的大小，每个桶(不含溢出桶)可以有8个k/v对，hmap中含有 1<< B 个桶，具体见overLoadFactor分析

    B := uint8(0)

    for overLoadFactor(hint, B) {

        B++

    }

    h.B = B // 此时 B=1

    // h.B = 1 创建buckets

    if h.B != 0 {

        var nextOverflow *bmap

        h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)// 分配内存

        if nextOverflow != nil {

            h.extra = new(mapextra)

            h.extra.nextOverflow = nextOverflow

        }

    }

    return h

}

const (

    bucketCntBits = 3

    bucketCnt     = 1 << bucketCntBits // =8

    loadFactorNum = 13

    loadFactorDen = 2

)    

// 如果 count > 8 && count > 13 * ( (1<<B) / 2 ), 返回true

// 1 << B bucket个数, 负载因子为： 13/2=6.5

func overLoadFactor(count int, B uint8) bool {

    return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen)

}

// b=1

func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {

    base := bucketShift(b) // base = 1 << 1 = 2

    nbuckets := base       // nbuckets = 2

    if b >= 4 {

        nbuckets += bucketShift(b - 4)

        sz := t.bucket.size * nbuckets

        up := roundupsize(sz)

        if up != sz {

            nbuckets = up / t.bucket.size

        }

    }

    if dirtyalloc == nil {

        // 申请内存，结构为一个数组，每个元素为 bucket， 个数为 1<<B = 2个，会申请连续内存大小为 bucket.size*nbuckets = 2*272 = 544个字节

        // 这里说明一下 bucket.size为什么等于272？ bmap的结构由四个部分组成，tophash，8个key，8个value，1一个指针。

        // tophash是一个数组，数组的大小为8，类型为uint8, uint8占一个字节，总计字节 8*1 = 8

        // key,value的数据类型都是string类型,string类型占16个字节，总计字节 8*16 + 8*16 = 256

        // 指针在64位cpu上占8个字节。因此总和为 8 + 256 + 8 = 272 个字节

        buckets = newarray(t.bucket, int(nbuckets)) 

    } else {

        buckets = dirtyalloc

        size := t.bucket.size * nbuckets

        if t.bucket.kind&kindNoPointers == 0 {

            memclrHasPointers(buckets, size)

        } else {

            memclrNoHeapPointers(buckets, size)

        }

    }

    if base != nbuckets {

        nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))

        last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))

        last.setoverflow(t, (*bmap)(buckets))

    }

    return buckets, nextOverflow

}

https://www.jianshu.com/p/9fcf9f9a2028

makemap64 是对于传入的第二个参数为int64 的变量使用的。如果hint的值大于int最大值，就将hint赋值为0，否则和makemap 初始化没有差别。为什么不把大于2^31 - 1 的map 直接初始化呢？因为在hmap 中 count 的值就是int，也就是说map最大就是 2^31 - 1 的大小。

https://www.cnblogs.com/-lee/p/12777254.html

https://blog.csdn.net/xz_studying/article/details/109171786

https://blog.csdn.net/xzw12138/article/details/107288181

https://www.cnblogs.com/-lee/p/12807063.html

赋值的实现，golang 为了对不同类型k做了优化，下面时一些实现方法：

func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {}

func mapassign_fast32(t *maptype, h *hmap, key uint32) unsafe.Pointer {}

func mapassign_fast32ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {}

func mapassign_fast64(t *maptype, h *hmap, key uint64) unsafe.Pointer {}

func mapassign_fast64ptr(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer{}

func mapassign_faststr(t *maptype, h *hmap, s string) unsafe.Pointer {}

内容大同小异，我们主要学习mapassign 的实现。

mapassign 方法的实现是查找一个空的bucket，把key赋值到bucket上，然后把val的地址返回,然后直接通过汇编做内存拷贝。

https://blog.csdn.net/u010927340/article/details/110194541

Category golang