redis中字典有以下要点:
(1)它就是一个键值对,对于hash冲突的处理采用了头插法的链式存储来解决。
(2)对rehash,扩展就是取第一个大于等于used * 2的2 ^ n的数作为新的hash表大小;缩紧就是取第一个大于等于used的2 ^ n的数作为新的hash表大小。后面会介绍到dict结构体中是有dictht ht[2]这个成员变量的,为什么是2个呢?就是为了做rehash时交替使用的。那么何时扩展,何时缩紧呢?有个负载因子的概念(负载因子 = used / size,注意这里面的used也是包括了链式存储解决冲突时候的元素个数),没有执行BGSAVE或者BGREWRITEAOF时大于1就会扩展,小于0.1就会缩紧。搞的时候会用渐进式rehash的方法来搞,再此过程中对于字典的删、改、查会在2个ht上面进行,而增只会在新的ht上进行。
主要关注dict.c和dici.h。
首先先看一下结构体dictht:
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
//每个具体table[i]中的节点数据类型是dictEntry 结构表示, 每个 dictEntry 结构都保存着一个键值对:
dictEntry **table; // 哈希表大小
unsigned long size; // 哈希表大小掩码,用于计算索引值,总是等于 size - 1
unsigned long sizemask; // 该哈希表已有节点的数量
unsigned long used; } dictht;
再看结构体dictEntry:
typedef struct dictEntry {
// 键
void *key; // 值
union {
void *val;
uint64_t u64;
int64_t s64;
} v; //next 属性是指向另一个哈希表节点的指针, 这个指针可以将多个哈希值相同的键值对连接在一次, 以此来解决键冲突(collision)的问题。
struct dictEntry *next; } dictEntry;
对于k1和k0值相等的情况下,见下图:
再来看结构体dict:
typedef struct dict {//dictCreate创建和初始化
// 类型特定函数,实现多态
dictType *type; // 私有数据
void *privdata; // 哈希表
dictht ht[]; // rehash 索引
int rehashidx; /* rehashing not in progress if rehashidx == -1 */ // 目前正在运行的安全迭代器的数量
int iterators; /* number of iterators currently running */ } dict;
再看结构体dictType:
typedef struct dictType {
unsigned int (*hashFunction)(const void *key); void *(*keyDup)(void *privdata, const void *key); void *(*valDup)(void *privdata, const void *obj); int (*keyCompare)(void *privdata, const void *key1, const void *key2); void (*keyDestructor)(void *privdata, void *key); void (*valDestructor)(void *privdata, void *obj); } dictType;
里面就是各种函数指针。
dictCreate:创建字典,比较简单,不过多解释
/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d)); _dictInit(d,type,privDataPtr); return d;
}
_dictInit:初始化操作
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
// 初始化两个哈希表的各项属性值
// 但暂时还不分配内存给哈希表数组
_dictReset(&d->ht[]);
_dictReset(&d->ht[]); // 设置类型特定函数
d->type = type; // 设置私有数据
d->privdata = privDataPtr; // 设置哈希表 rehash 状态
d->rehashidx = -; // 设置字典的安全迭代器数量
d->iterators = ; return DICT_OK;
}
_dictReset:赋初值
static void _dictReset(dictht *ht)
{
ht->table = NULL;
ht->size = ;
ht->sizemask = ;
ht->used = ;
}
dictAdd:添加k-v到字典中,调用关系比较乱,我们来看一下
dictAdd
-->dictAddRaw
-->_dictRehashStep
-->dictRehash
-->_dictKeyIndex
-->_dictExpandIfNeeded
-->dictExpand
-->_dictNextPower
-->dictSetKey
-->dictSetVal
有了调用关系就好办了,下面来分别分析以上函数都做了什么。
_dictRehashStep:分步rehash包裹函数
static void _dictRehashStep(dict *d) {
if (d->iterators == ) dictRehash(d,);
}
dictRehash:分步rehash
int dictRehash(dict *d, int n) {
// 只可以在 rehash 进行中时执行
if (!dictIsRehashing(d)) return ; // 进行 N 步迁移
while(n--) {
dictEntry *de, *nextde; /* Check if we already rehashed the whole table... */
// 如果 0 号哈希表为空,那么表示 rehash 执行完毕
if (d->ht[].used == ) {
zfree(d->ht[].table);
d->ht[] = d->ht[];
_dictReset(&d->ht[]);
d->rehashidx = -;
return ;
} /* Note that rehashidx can't overflow as we are sure there are more
* elements because ht[0].used != 0 */
assert(d->ht[].size > (unsigned)d->rehashidx); // 略过数组中为空的索引,找到下一个非空索引
while(d->ht[].table[d->rehashidx] == NULL) d->rehashidx++; // 指向该索引的链表表头节点
de = d->ht[].table[d->rehashidx];
/* Move all the keys in this bucket from the old to the new hash HT */
while(de) {
unsigned int h; nextde = de->next; /* Get the index in the new hash table */
h = dictHashKey(d, de->key) & d->ht[].sizemask; de->next = d->ht[].table[h];
d->ht[].table[h] = de; // 更新计数器
d->ht[].used--;
d->ht[].used++; // 继续处理下个节点
de = nextde;
}
// 将刚迁移完的哈希表索引的指针设为空
d->ht[].table[d->rehashidx] = NULL;
// 更新 rehash 索引
d->rehashidx++;
} return ;
}
dictExpand:扩充函数,是否需要rehash的标志rehashidx也是从这里面搞的,这样它就不为-1了。
/* Expand or create the hash table */
int dictExpand(dict *d, unsigned long size)
{
// 新哈希表
dictht n; /* the new hash table */ unsigned long realsize = _dictNextPower(size); /* the size is invalid if it is smaller than the number of
* elements already inside the hash table */
if (dictIsRehashing(d) || d->ht[].used > size)
return DICT_ERR; /* Allocate the new hash table and initialize all pointers to NULL */
n.size = realsize;
n.sizemask = realsize-;
n.table = zcalloc(realsize*sizeof(dictEntry*));
n.used = ; //下面是要区分2种情况的,需要注意了 /* Is this the first initialization? If so it's not really a rehashing
* we just set the first hash table so that it can accept keys. */
// 如果 0 号哈希表为空,那么这是一次初始化:
if (d->ht[].table == NULL) {
d->ht[] = n;
return DICT_OK;
} /* Prepare a second hash table for incremental rehashing */
// 如果 0 号哈希表非空,那么这是一次 rehash :
// 程序将新哈希表设置为 1 号哈希表,
// 并将字典的 rehash 标识打开,让程序可以开始对字典进行 rehash
d->ht[] = n;
d->rehashidx = ;
return DICT_OK; }
dictAddRaw:添加新的键到字典
/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht; if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if
* the element already exists. */
if ((index = _dictKeyIndex(d, key)) == -)
return NULL; /* Allocate the memory and store the new entry */
ht = dictIsRehashing(d) ? &d->ht[] : &d->ht[];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++; /* Set the hash entry fields. */
dictSetKey(d, entry, key); return entry;
}
dictAdd:添加新的键值对到字典
/* Add an element to the target hash table */
int dictAdd(dict *d, void *key, void *val)
{
dictEntry *entry = dictAddRaw(d,key); // 键已存在,添加失败
if (!entry) return DICT_ERR; // 键不存在,设置节点的值
dictSetVal(d, entry, val); return DICT_OK;
}
dictReplace:更新键值对,原来没有就添加,原来有就更新值
/* Add an element, discarding the old if the key already exists.
* Return 1 if the key was added from scratch, 0 if there was already an element with such key and dictReplace() just performed a value update operation.
*/
int dictReplace(dict *d, void *key, void *val)
{
dictEntry *entry, auxentry; /* Try to add the element. If the key
* does not exists dictAdd will suceed. */
if (dictAdd(d, key, val) == DICT_OK)
return ; /* It already exists, get the entry */
entry = dictFind(d, key); auxentry = *entry; dictSetVal(d, entry, val); dictFreeVal(d, &auxentry); return ;
}
dictGenericDelete:删除指定key对应的一个元素
/* Search and remove an element */
static int dictGenericDelete(dict *d, const void *key, int nofree)
{
unsigned int h, idx;
dictEntry *he, *prevHe;
int table; if (d->ht[].size == ) return DICT_ERR; /* d->ht[0].table is NULL */ if (dictIsRehashing(d)) _dictRehashStep(d); // 计算哈希值
h = dictHashKey(d, key); // 遍历哈希表
for (table = ; table <= ; table++) { // 计算索引值
idx = h & d->ht[table].sizemask;
// 指向该索引上的链表,就是链表的第一个元素
he = d->ht[table].table[idx];
prevHe = NULL; while(he) { if (dictCompareKeys(d, key, he->key)) { /* Unlink the element from the list */
// 就是删除一个就得了
if (prevHe)
prevHe->next = he->next;
else
d->ht[table].table[idx] = he->next; if (!nofree) {
dictFreeKey(d, he);
dictFreeVal(d, he);
} zfree(he); d->ht[table].used--; return DICT_OK;
} prevHe = he;
he = he->next;
} if (!dictIsRehashing(d)) break;
} return DICT_ERR; /* not found */
}
_dictClear:释放hash
/* Destroy an entire dictionary */
int _dictClear(dict *d, dictht *ht, void(callback)(void *)) {
unsigned long i; /* Free all the elements */
for (i = ; i < ht->size && ht->used > ; i++) {
dictEntry *he, *nextHe; if (callback && (i & ) == ) callback(d->privdata); // 跳过空索引
if ((he = ht->table[i]) == NULL) continue; // 遍历整个链表
while(he) {
nextHe = he->next;
dictFreeKey(d, he);
dictFreeVal(d, he);
zfree(he); ht->used--; he = nextHe;
}
} /* Free the table and the allocated cache structure */
zfree(ht->table); /* Re-initialize the table */
_dictReset(ht); return DICT_OK; /* never fails */
}
关于dictGetIterator、dictGetSafeIterator、dictNext、dictReleaseIterator具体应用暂时分析一下:
首先先看用到这几个函数的地方吧
void keysCommand(redisClient *c) {
dictIterator *di;
dictEntry *de; sds pattern = c->argv[]->ptr; int plen = sdslen(pattern), allkeys;
unsigned long numkeys = ;
void *replylen = addDeferredMultiBulkLength(c); /* 首先先搞一个迭代器出来 */
di = dictGetSafeIterator(c->db->dict);
allkeys = (pattern[] == '*' && pattern[] == '\0'); /* 开始遍历迭代器 */
while((de = dictNext(di)) != NULL) {
sds key = dictGetKey(de);
robj *keyobj; if (allkeys || stringmatchlen(pattern,plen,key,sdslen(key),)) { keyobj = createStringObject(key,sdslen(key)); if (expireIfNeeded(c->db,keyobj) == ) {
addReplyBulk(c,keyobj);
numkeys++;
} decrRefCount(keyobj);
}
} /* 释放迭代器 */
dictReleaseIterator(di); setDeferredMultiBulkLength(c,replylen,numkeys);
}
有了上面的应用场景,我们再来看上面几个函数的实现就好办了。
先看dictIterator结构体:
/* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator { // 被迭代的字典
dict *d; // table :正在被迭代的哈希表号码,值可以是 0 或 1 。 dictht ht[2];中的哪一个
// index :迭代器当前所指向的哈希表索引位置。 对应具体的桶槽位号
// safe :标识这个迭代器是否安全
int table, index, safe; // entry :当前迭代到的节点的指针
// nextEntry :当前迭代节点的下一个节点
dictEntry *entry, *nextEntry; long long fingerprint; /* unsafe iterator fingerprint for misuse detection */
} dictIterator;
dictGetIterator:创建迭代器
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(sizeof(*iter)); iter->d = d;
iter->table = ;
iter->index = -;
iter->safe = ;
iter->entry = NULL;
iter->nextEntry = NULL; return iter;
}
dictNext:迭代,遍历桶中的所有节点,第一次遍历该函数的时候直接获取具体桶首元素,否则使用nextEntry。这个函数比较难理解……
dictEntry *dictNext(dictIterator *iter)
{
while () { // 进入这个循环有两种可能:
// 1) 这是迭代器第一次运行
// 2) 当前索引链表中的节点已经迭代完(NULL 为链表的表尾)
if (iter->entry == NULL) { // 指向被迭代的哈希表
dictht *ht = &iter->d->ht[iter->table]; // 初次迭代时执行
if (iter->index == - && iter->table == ) {
// 如果是安全迭代器,那么更新安全迭代器计数器
if (iter->safe)
iter->d->iterators++;
// 如果是不安全迭代器,那么计算指纹
else
iter->fingerprint = dictFingerprint(iter->d);
}
// 更新索引
iter->index++; //默认值为-1,自加后刚好为0,即第一个桶 // 如果迭代器的当前索引大于当前被迭代的哈希表的大小
// 那么说明这个哈希表已经迭代完毕
if (iter->index >= (signed) ht->size) {
// 如果正在 rehash 的话,那么说明 1 号哈希表也正在使用中
// 那么继续对 1 号哈希表进行迭代
if (dictIsRehashing(iter->d) && iter->table == ) {
iter->table++;
iter->index = ;
ht = &iter->d->ht[];
// 如果没有 rehash ,那么说明迭代已经完成
} else {
break;
}
} // 如果进行到这里,说明这个哈希表并未迭代完
// 更新节点指针,指向下个索引链表的表头节点
iter->entry = ht->table[iter->index];
} else {
// 执行到这里,说明程序正在迭代某个链表
// 将节点指针指向链表的下个节点
iter->entry = iter->nextEntry;
} if (iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
iter->nextEntry = iter->entry->next;
return iter->entry;
}
} // 迭代完毕
return NULL;
}
dictReleaseIterator:释放迭代器,没有什么好说的了
void dictReleaseIterator(dictIterator *iter)
{ if (!(iter->index == - && iter->table == )) {
// 释放安全迭代器时,安全迭代器计数器减一
if (iter->safe)
iter->d->iterators--;
// 释放不安全迭代器时,验证指纹是否有变化
else
assert(iter->fingerprint == dictFingerprint(iter->d));
}
zfree(iter);
}
关于迭代器里面,安全和非安全迭代器还不是特别明白;另外dictScan暂时没有深入研究,后续再深入研究吧。