Redis系列(十):数据结构Set源码解析和SADD、SINTER、SDIFF、SUNION、SPOP命令

时间:2023-02-21 16:35:15

1.介绍

Hash是以K->V形式存储,而Set则是K存储,空间节省了很多

Redis中Set是String类型的无序集合;集合成员是唯一的。

这就意味着集合中不能出现重复的数据。可根据应用场景需要选用该数据类型。(比如:好友/关注/粉丝/感兴趣的人/黑白名单)

2.源码解析

Redis使用Dict和IntSet保存Set数据

// 1. inset 数据结构,在set数据量小且都是整型数据时使用
typedef struct intset {
// 编码范围,由具体存储值决定
uint32_t encoding;
// 数组长度
uint32_t length;
// 具体存储元素的容器
int8_t contents[];
} intset;
// 2. dict 相关数据结构,即是 hash 的实现相关的数据结构
/* This is our hash table structure. Every dictionary has two of this as we
* implement incremental rehashing, for the old to the new table. */
typedef struct dictht {
dictEntry **table;
unsigned long size;
unsigned long sizemask;
unsigned long used;
} dictht; typedef struct dict {
dictType *type;
void *privdata;
dictht ht[];
long rehashidx; /* rehashing not in progress if rehashidx == -1 */
unsigned long iterators; /* number of iterators currently running */
} dict; /* If safe is set to 1 this is a safe iterator, that means, you can call
* dictAdd, dictFind, and other functions against the dictionary even while
* iterating. Otherwise it is a non safe iterator, and only dictNext()
* should be called while iterating. */
typedef struct dictIterator {
dict *d;
long index;
int table, safe;
dictEntry *entry, *nextEntry;
/* unsafe iterator fingerprint for misuse detection. */
long long fingerprint;
} dictIterator; typedef struct dictEntry {
void *key;
union {
void *val;
uint64_t u64;
int64_t s64;
double d;
} v;
struct dictEntry *next;
} dictEntry; typedef struct dictType {
unsigned int (*hashFunction)(const void *key);
void *(*keyDup)(void *privdata, const void *key);
void *(*valDup)(void *privdata, const void *obj);
int (*keyCompare)(void *privdata, const void *key1, const void *key2);
void (*keyDestructor)(void *privdata, void *key);
void (*valDestructor)(void *privdata, void *obj);
} dictType;

3.SADD

加一个或多个指定的member元素到集合的 key中.指定的一个或者多个元素member 如果已经在集合key中存在则忽略.

如果集合key 不存在,则新建集合key,并添加member元素到集合key中.

如果key 的类型不是集合则返回错误.

时间复杂度:O(N)

127.0.0.1:> sadd myset "Hello"
(integer)
127.0.0.1:> sadd myset "Hello"
(integer)
127.0.0.1:> smembers myset
) "Hello"
127.0.0.1:>
// 用法: SADD key member1 [member2]
// t_set.c, 添加member
void saddCommand(client *c) {
robj *set;
int j, added = ;
// 先从当前db中查找set实例
set = lookupKeyWrite(c->db,c->argv[]);
if (set == NULL) {
// 1. 新建set实例并添加到当前db中
set = setTypeCreate(c->argv[]->ptr);
dbAdd(c->db,c->argv[],set);
} else {
if (set->type != OBJ_SET) {
addReply(c,shared.wrongtypeerr);
return;
}
}
// 对于n个member,一个个地添加即可
for (j = ; j < c->argc; j++) {
// 2. 只有添加成功, added 才会加1
if (setTypeAdd(set,c->argv[j]->ptr)) added++;
}
// 命令传播
if (added) {
signalModifiedKey(c->db,c->argv[]);
notifyKeyspaceEvent(NOTIFY_SET,"sadd",c->argv[],c->db->id);
}
server.dirty += added;
// 响应添加成功的数量
addReplyLongLong(c,added);
} // 1. 创建新的set集合实例(需根据首次的参数类型判定)
// t_set.c, 创建set实例
/* Factory method to return a set that *can* hold "value". When the object has
* an integer-encodable value, an intset will be returned. Otherwise a regular
* hash table. */
robj *setTypeCreate(sds value) {
// 如果传入的value是整型,则创建 intset 类型的set
// 否则使用dict类型的set
// 一般地,第一个数据为整型,后续数据也应该为整型,所以这个数据结构相对稳定
// 而hash的容器创建时,只使用了一 ziplist 创建,这是不一样的实现
if (isSdsRepresentableAsLongLong(value,NULL) == C_OK)
return createIntsetObject();
return createSetObject();
} // 1.1. 创建 intset 型的set
// object.c
robj *createIntsetObject(void) {
intset *is = intsetNew();
robj *o = createObject(OBJ_SET,is);
o->encoding = OBJ_ENCODING_INTSET;
return o;
}
// intset.c, new一个空的intset对象
/* Create an empty intset. */
intset *intsetNew(void) {
intset *is = zmalloc(sizeof(intset));
is->encoding = intrev32ifbe(INTSET_ENC_INT16);
is->length = ;
return is;
} // 1.2. 创建dict 型的set
robj *createSetObject(void) {
dict *d = dictCreate(&setDictType,NULL);
robj *o = createObject(OBJ_SET,d);
o->encoding = OBJ_ENCODING_HT;
return o;
}
// dict.c
/* Create a new hash table */
dict *dictCreate(dictType *type,
void *privDataPtr)
{
dict *d = zmalloc(sizeof(*d)); _dictInit(d,type,privDataPtr);
return d;
}
/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
void *privDataPtr)
{
_dictReset(&d->ht[]);
_dictReset(&d->ht[]);
d->type = type;
d->privdata = privDataPtr;
d->rehashidx = -;
d->iterators = ;
return DICT_OK;
} // 2. 添加member到set集合中
// t_set.c, 添加元素
/* Add the specified value into a set.
*
* If the value was already member of the set, nothing is done and 0 is
* returned, otherwise the new element is added and 1 is returned. */
int setTypeAdd(robj *subject, sds value) {
long long llval;
// 2.1. HT编码和INTSET编码分别处理就好
if (subject->encoding == OBJ_ENCODING_HT) {
dict *ht = subject->ptr;
// 以 value 为 key, 添加实例到ht中
// 实现过程也很简单,大概就是如果存在则返回NULL(即无需添加),辅助rehash,分配内存创建dictEntry实例,稍后简单看看
dictEntry *de = dictAddRaw(ht,value);
if (de) {
// 重新设置key为 sdsdup(value), value为NULL
dictSetKey(ht,de,sdsdup(value));
dictSetVal(ht,de,NULL);
return ;
}
}
// 2.2. intset 编码的member添加
else if (subject->encoding == OBJ_ENCODING_INTSET) {
// 尝试解析value为 long 型,值写入 llval 中
if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
uint8_t success = ;
// 情况1. 可添加到intset中
subject->ptr = intsetAdd(subject->ptr,llval,&success);
if (success) {
/* Convert to regular set when the intset contains
* too many entries. */
// 默认: 512, intset大于之后,则转换为ht hash表模式存储
if (intsetLen(subject->ptr) > server.set_max_intset_entries)
// 2.3. 转换intset编码为 ht 编码
setTypeConvert(subject,OBJ_ENCODING_HT);
return ;
}
} else {
// 情况2. member 是字符串型,先将set容器转换为 ht 编码,再重新执行dict的添加模式
/* Failed to get integer from object, convert to regular set. */
setTypeConvert(subject,OBJ_ENCODING_HT); /* The set *was* an intset and this value is not integer
* encodable, so dictAdd should always work. */
serverAssert(dictAdd(subject->ptr,sdsdup(value),NULL) == DICT_OK);
return ;
}
} else {
serverPanic("Unknown set encoding");
}
return ;
}
// 2.1. 添加member到dict中(略解, 在hash数据结构解析中已介绍)
// dict.c, 添加某key到 d 字典中
/* Low level add. This function adds the entry but instead of setting
* a value returns the dictEntry structure to the user, that will make
* sure to fill the value field as he wishes.
*
* This function is also directly exposed to the user API to be called
* mainly in order to store non-pointers inside the hash value, example:
*
* entry = dictAddRaw(dict,mykey);
* if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
*
* Return values:
*
* If key already exists NULL is returned.
* If key was added, the hash entry is returned to be manipulated by the caller.
*/
dictEntry *dictAddRaw(dict *d, void *key)
{
int index;
dictEntry *entry;
dictht *ht; if (dictIsRehashing(d)) _dictRehashStep(d); /* Get the index of the new element, or -1 if
* the element already exists. */
// 获取需要添加的key的存放位置下标(slot), 如果该key已存在, 则返回-1(无可用slot)
if ((index = _dictKeyIndex(d, key)) == -)
return NULL; /* Allocate the memory and store the new entry.
* Insert the element in top, with the assumption that in a database
* system it is more likely that recently added entries are accessed
* more frequently. */
ht = dictIsRehashing(d) ? &d->ht[] : &d->ht[];
entry = zmalloc(sizeof(*entry));
entry->next = ht->table[index];
ht->table[index] = entry;
ht->used++; /* Set the hash entry fields. */
dictSetKey(d, entry, key);
return entry;
} // 2.2. 添加整型数据到 intset中
// intset.c, 添加value
/* Insert an integer in the intset */
intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
// 获取value的所属范围
uint8_t valenc = _intsetValueEncoding(value);
uint32_t pos;
if (success) *success = ; /* Upgrade encoding if necessary. If we need to upgrade, we know that
* this value should be either appended (if > 0) or prepended (if < 0),
* because it lies outside the range of existing values. */
// 默认 is->encoding 为 INTSET_ENC_INT16 (16位长)
// 2.2.1. 即超过当前预设的位长,则需要增大预设,然后添加
// 此时的value可以确定: 要么是最大,要么是最小 (所以我们可以推断,此intset应该是有序的)
if (valenc > intrev32ifbe(is->encoding)) {
/* This always succeeds, so we don't need to curry *success. */
return intsetUpgradeAndAdd(is,value);
} else {
/* Abort if the value is already present in the set.
* This call will populate "pos" with the right position to insert
* the value when it cannot be found. */
// 2.2.2. 在当前环境下添加value
// 找到value则说明元素已存在,不可再添加
// pos 保存比value小的第1个元素的位置
if (intsetSearch(is,value,&pos)) {
if (success) *success = ;
return is;
} is = intsetResize(is,intrev32ifbe(is->length)+);
// 在pos不是末尾位置时,需要留出空位,依次移动后面的元素
if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+);
}
// 针对编码位不变更的情况下设置pos位置的值
_intsetSet(is,pos,value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+);
return is;
}
// 判断 value 的位长
// INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64
// 2 < 4 < 8
/* Return the required encoding for the provided value. */
static uint8_t _intsetValueEncoding(int64_t v) {
if (v < INT32_MIN || v > INT32_MAX)
return INTSET_ENC_INT64;
else if (v < INT16_MIN || v > INT16_MAX)
return INTSET_ENC_INT32;
else
return INTSET_ENC_INT16;
} // 2.2.1. 升级预设位长,并添加value
// intset.c
/* Upgrades the intset to a larger encoding and inserts the given integer. */
static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
uint8_t curenc = intrev32ifbe(is->encoding);
uint8_t newenc = _intsetValueEncoding(value);
int length = intrev32ifbe(is->length);
int prepend = value < ? : ; /* First set new encoding and resize */
is->encoding = intrev32ifbe(newenc);
// 每次必进行扩容
is = intsetResize(is,intrev32ifbe(is->length)+); /* Upgrade back-to-front so we don't overwrite values.
* Note that the "prepend" variable is used to make sure we have an empty
* space at either the beginning or the end of the intset. */
// 因编码发生变化,元素的位置已经不能一一对应,需要按照原来的编码依次转移过来
// 从后往前依次赋值,所以,内存位置上不存在覆盖问题(后面内存位置一定是空的),直接依次赋值即可(高效复制)
while(length--)
_intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc)); /* Set the value at the beginning or the end. */
// 对新增加的元素,负数添加到第0位,否则添加到最后一个元素后一位
if (prepend)
_intsetSet(is,,value);
else
_intsetSet(is,intrev32ifbe(is->length),value);
is->length = intrev32ifbe(intrev32ifbe(is->length)+);
return is;
}
/* Resize the intset */
static intset *intsetResize(intset *is, uint32_t len) {
uint32_t size = len*intrev32ifbe(is->encoding);
// malloc
is = zrealloc(is,sizeof(intset)+size);
return is;
}
// intset.c, 获取pos位置的值
/* Return the value at pos, given an encoding. */
static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
int64_t v64;
int32_t v32;
int16_t v16; if (enc == INTSET_ENC_INT64) {
memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
memrev64ifbe(&v64);
return v64;
} else if (enc == INTSET_ENC_INT32) {
memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
memrev32ifbe(&v32);
return v32;
} else {
memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
memrev16ifbe(&v16);
return v16;
}
}
// intset.c, 设置pos位置的值,和数组赋值的实际意义差不多
// 只是这里数据类型是不确定的,所以使用指针进行赋值
/* Set the value at pos, using the configured encoding. */
static void _intsetSet(intset *is, int pos, int64_t value) {
uint32_t encoding = intrev32ifbe(is->encoding);
if (encoding == INTSET_ENC_INT64) {
((int64_t*)is->contents)[pos] = value;
memrev64ifbe(((int64_t*)is->contents)+pos);
} else if (encoding == INTSET_ENC_INT32) {
((int32_t*)is->contents)[pos] = value;
memrev32ifbe(((int32_t*)is->contents)+pos);
} else {
((int16_t*)is->contents)[pos] = value;
memrev16ifbe(((int16_t*)is->contents)+pos);
}
} // 2.2.2. 在编码类型未变更的情况,需要查找可以存放value的位置(为了确认该value是否已存在,以及小于value的第一个位置赋值)
/* Search for the position of "value". Return 1 when the value was found and
* sets "pos" to the position of the value within the intset. Return 0 when
* the value is not present in the intset and sets "pos" to the position
* where "value" can be inserted. */
static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
int min = , max = intrev32ifbe(is->length)-, mid = -;
int64_t cur = -; /* The value can never be found when the set is empty */
if (intrev32ifbe(is->length) == ) {
if (pos) *pos = ;
return ;
} else {
/* Check for the case where we know we cannot find the value,
* but do know the insert position. */
// 因 intset 是有序数组,即可以判定是否超出范围,如果超出则元素必定不存在
if (value > _intsetGet(is,intrev32ifbe(is->length)-)) {
if (pos) *pos = intrev32ifbe(is->length);
return ;
} else if (value < _intsetGet(is,)) {
if (pos) *pos = ;
return ;
}
}
// 使用二分查找
while(max >= min) {
mid = ((unsigned int)min + (unsigned int)max) >> ;
cur = _intsetGet(is,mid);
if (value > cur) {
min = mid+;
} else if (value < cur) {
max = mid-;
} else {
// 找到了
break;
}
} if (value == cur) {
if (pos) *pos = mid;
return ;
} else {
// 在没有找到的情况下,min就是第一个比 value 小的元素
if (pos) *pos = min;
return ;
}
}
// intset移动(内存移动)
static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
void *src, *dst;
uint32_t bytes = intrev32ifbe(is->length)-from;
uint32_t encoding = intrev32ifbe(is->encoding); if (encoding == INTSET_ENC_INT64) {
src = (int64_t*)is->contents+from;
dst = (int64_t*)is->contents+to;
bytes *= sizeof(int64_t);
} else if (encoding == INTSET_ENC_INT32) {
src = (int32_t*)is->contents+from;
dst = (int32_t*)is->contents+to;
bytes *= sizeof(int32_t);
} else {
src = (int16_t*)is->contents+from;
dst = (int16_t*)is->contents+to;
bytes *= sizeof(int16_t);
}
memmove(dst,src,bytes);
} // 2.3. 转换intset编码为 ht 编码 (如果遇到string型的value或者intset数量大于阀值(默认:512)时)
// t_set.c, 类型转换
/* Convert the set to specified encoding. The resulting dict (when converting
* to a hash table) is presized to hold the number of elements in the original
* set. */
void setTypeConvert(robj *setobj, int enc) {
setTypeIterator *si;
// 要求外部必须保证 set类型且 intset 编码
serverAssertWithInfo(NULL,setobj,setobj->type == OBJ_SET &&
setobj->encoding == OBJ_ENCODING_INTSET); if (enc == OBJ_ENCODING_HT) {
int64_t intele;
// 直接创建一个 dict 来容纳数据
dict *d = dictCreate(&setDictType,NULL);
sds element; /* Presize the dict to avoid rehashing */
// 直接一次性扩容成需要的大小
dictExpand(d,intsetLen(setobj->ptr)); /* To add the elements we extract integers and create redis objects */
// setTypeIterator 迭代器是转换的关键
si = setTypeInitIterator(setobj);
while (setTypeNext(si,&element,&intele) != -) {
// element:ht编码时的key, intele: intset编码时的value
element = sdsfromlonglong(intele);
// 因set特性保证是无重复元素,所以添加dict时,必然应成功
// 此处应无 rehash, 而是直接计算 hashCode, 放置元素, 时间复杂度 O(1)
serverAssert(dictAdd(d,element,NULL) == DICT_OK);
}
// 释放迭代器
setTypeReleaseIterator(si); setobj->encoding = OBJ_ENCODING_HT;
zfree(setobj->ptr);
setobj->ptr = d;
} else {
serverPanic("Unsupported set conversion");
}
}
// t_set.c, 获取set集合的迭代器
setTypeIterator *setTypeInitIterator(robj *subject) {
setTypeIterator *si = zmalloc(sizeof(setTypeIterator));
// 设置迭代器公用信息
si->subject = subject;
si->encoding = subject->encoding;
// hash表则需要再迭代 dict
if (si->encoding == OBJ_ENCODING_HT) {
si->di = dictGetIterator(subject->ptr);
}
// intset 比较简单,直接设置下标即可
else if (si->encoding == OBJ_ENCODING_INTSET) {
si->ii = ;
} else {
serverPanic("Unknown set encoding");
}
return si;
}
// dict.c, dict迭代器初始化
dictIterator *dictGetIterator(dict *d)
{
dictIterator *iter = zmalloc(sizeof(*iter)); iter->d = d;
iter->table = ;
iter->index = -;
iter->safe = ;
iter->entry = NULL;
iter->nextEntry = NULL;
return iter;
}
// t_set.c,
/* Move to the next entry in the set. Returns the object at the current
* position.
*
* Since set elements can be internally be stored as SDS strings or
* simple arrays of integers, setTypeNext returns the encoding of the
* set object you are iterating, and will populate the appropriate pointer
* (sdsele) or (llele) accordingly.
*
* Note that both the sdsele and llele pointers should be passed and cannot
* be NULL since the function will try to defensively populate the non
* used field with values which are easy to trap if misused.
*
* When there are no longer elements -1 is returned. */
int setTypeNext(setTypeIterator *si, sds *sdsele, int64_t *llele) {
// hash表返回key
if (si->encoding == OBJ_ENCODING_HT) {
dictEntry *de = dictNext(si->di);
if (de == NULL) return -;
*sdsele = dictGetKey(de);
*llele = -; /* Not needed. Defensive. */
}
// intset 直接获取下标对应的元素即可
else if (si->encoding == OBJ_ENCODING_INTSET) {
if (!intsetGet(si->subject->ptr,si->ii++,llele))
return -;
*sdsele = NULL; /* Not needed. Defensive. */
} else {
serverPanic("Wrong set encoding in setTypeNext");
}
return si->encoding;
}
// case1: intset直接叠加下标即可
// intset.c
/* Sets the value to the value at the given position. When this position is
* out of range the function returns 0, when in range it returns 1. */
uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
if (pos < intrev32ifbe(is->length)) {
*value = _intsetGet(is,pos);
return ;
}
return ;
}
/* Return the value at pos, using the configured encoding. */
static int64_t _intsetGet(intset *is, int pos) {
return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
}
/* Return the value at pos, given an encoding. */
static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
int64_t v64;
int32_t v32;
int16_t v16; if (enc == INTSET_ENC_INT64) {
memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
memrev64ifbe(&v64);
return v64;
} else if (enc == INTSET_ENC_INT32) {
memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
memrev32ifbe(&v32);
return v32;
} else {
memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
memrev16ifbe(&v16);
return v16;
}
}
// (附带)case2: dict的迭代
// dict.c, dict的迭代,存疑问
dictEntry *dictNext(dictIterator *iter)
{
// 一直迭代查找
while () {
// iter->entry 为NULL, 有两种可能: 1. 初始化时; 2. 上一元素为迭代完成(hash冲突)
if (iter->entry == NULL) {
dictht *ht = &iter->d->ht[iter->table];
if (iter->index == - && iter->table == ) {
if (iter->safe)
iter->d->iterators++;
else
iter->fingerprint = dictFingerprint(iter->d);
}
// 直接使用下标进行迭代,如果中间有空闲位置该如何处理??
// 看起来redis是使用了全量迭代元素的处理办法,即有可能有许多空迭代过程
// 一般地,也是进行两层迭代,jdk的hashmap迭代实现为直接找到下一次非空的元素为止
iter->index++;
// 直到迭代完成所有元素,否则会直到找到一个元素为止
if (iter->index >= (long) ht->size) {
if (dictIsRehashing(iter->d) && iter->table == ) {
iter->table++;
iter->index = ;
ht = &iter->d->ht[];
} else {
break;
}
}
iter->entry = ht->table[iter->index];
} else {
// entry不为空,就一定有nextEntry??
iter->entry = iter->nextEntry;
}
// 如果当前entry为空,则继续迭代下一个 index
if (iter->entry) {
/* We need to save the 'next' here, the iterator user
* may delete the entry we are returning. */
iter->nextEntry = iter->entry->next;
return iter->entry;
}
}
return NULL;
}

4.SISMEMBER

返回成员 member 是否是存储的集合 key的成员.

如果member元素是集合key的成员,则返回1

如果member元素不是key的成员,或者集合key不存在,则返回0

时间复杂度:O(1)

127.0.0.1:> sismember myset "Hello"
(integer)
127.0.0.1:> sismember myset "World"
(integer)
127.0.0.1:>
// 用法: SISMEMBER key member
// t_set.c,
void sismemberCommand(client *c) {
robj *set; if ((set = lookupKeyReadOrReply(c,c->argv[],shared.czero)) == NULL ||
checkType(c,set,OBJ_SET)) return;
// 主要方法 setTypeIsMember
if (setTypeIsMember(set,c->argv[]->ptr))
// 回复1
addReply(c,shared.cone);
else
// 回复0
addReply(c,shared.czero);
}
// t_set.c
int setTypeIsMember(robj *subject, sds value) {
long long llval;
if (subject->encoding == OBJ_ENCODING_HT) {
// hash 表的查找方式,hashCode 计算,链表查找,就这么简单
return dictFind((dict*)subject->ptr,value) != NULL;
} else if (subject->encoding == OBJ_ENCODING_INTSET) {
// 如果当前的set集合是 intset 编码的,则只有查找值也是整型的情况下才可能查找到元素
if (isSdsRepresentableAsLongLong(value,&llval) == C_OK) {
// intset 查找,而且 intset 是有序的,所以直接使用二分查找即可
return intsetFind((intset*)subject->ptr,llval);
}
} else {
serverPanic("Unknown set encoding");
}
return ;
}
/* Determine whether a value belongs to this set */
uint8_t intsetFind(intset *is, int64_t value) {
uint8_t valenc = _intsetValueEncoding(value);
// 最大范围检查,加二分查找
// intsetSearch 前面已介绍
return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
}

5.SINTER、SDIFF、SUNION

sinter:

返回指定所有的集合的成员的交集,例如(共同好友)

sdiff:

返回一个集合与给定集合的差集的元素

sunion

返回给定的多个集合的并集中的所有成员

127.0.0.1:> sadd myset1
(integer)
127.0.0.1:> sadd myset2
(integer)
127.0.0.1:> sinter myset1 myset2
) ""
) ""
) ""
127.0.0.1:> sdiff myset1 myset2
) ""
) ""
127.0.0.1:> sunion myset1 myset2
) ""
) ""
) ""
) ""
) ""
) ""
) ""
127.0.0.1:>

sinter源码解析

// 用法: SINTER key1 [key2]
// t_set.c, sinter 实现
void sinterCommand(client *c) {
// 第三个参数是用来存储 交集结果的,两段代码已做复用,说明存储过程还是比较简单的
sinterGenericCommand(c,c->argv+,c->argc-,NULL);
}
// t_set.c, 求n个key的集合交集
void sinterGenericCommand(client *c, robj **setkeys,
unsigned long setnum, robj *dstkey) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
sds elesds;
int64_t intobj;
void *replylen = NULL;
unsigned long j, cardinality = ;
int encoding; for (j = ; j < setnum; j++) {
// 依次查找每个key的set实例
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);
// 只要有一个set为空,则交集必定为为,无需再找
if (!setobj) {
zfree(sets);
if (dstkey) {
// 没有交集,直接将dstKey 删除,注意此逻辑??
if (dbDelete(c->db,dstkey)) {
signalModifiedKey(c->db,dstkey);
server.dirty++;
}
addReply(c,shared.czero);
} else {
addReply(c,shared.emptymultibulk);
}
return;
}
if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
sets[j] = setobj;
}
/* Sort sets from the smallest to largest, this will improve our
* algorithm's performance */
// 快速排序算法,将 sets 按照元素长度做排序,使最少元素的set排在最前面
qsort(sets,setnum,sizeof(robj*),qsortCompareSetsByCardinality); /* The first thing we should output is the total number of elements...
* since this is a multi-bulk write, but at this stage we don't know
* the intersection set size, so we use a trick, append an empty object
* to the output list and save the pointer to later modify it with the
* right length */
if (!dstkey) {
replylen = addDeferredMultiBulkLength(c);
} else {
/* If we have a target key where to store the resulting set
* create this key with an empty set inside */
dstset = createIntsetObject();
} /* Iterate all the elements of the first (smallest) set, and test
* the element against all the other sets, if at least one set does
* not include the element it is discarded */
// 看来redis也是直接通过迭代的方式来完成交集功能
// 迭代最少的set集合,依次查找后续的set集合,当遇到一个不存在的set时,上值被排除,否则是交集
si = setTypeInitIterator(sets[]);
while((encoding = setTypeNext(si,&elesds,&intobj)) != -) {
for (j = ; j < setnum; j++) {
if (sets[j] == sets[]) continue;
// 以下是查找过程
// 分 hash表查找 和 intset 编码查找
if (encoding == OBJ_ENCODING_INTSET) {
/* intset with intset is simple... and fast */
// 两个集合都是 intset 编码,直接二分查找即可
if (sets[j]->encoding == OBJ_ENCODING_INTSET &&
!intsetFind((intset*)sets[j]->ptr,intobj))
{
break;
/* in order to compare an integer with an object we
* have to use the generic function, creating an object
* for this */
} else if (sets[j]->encoding == OBJ_ENCODING_HT) {
// 编码不一致,但元素可能相同
// setTypeIsMember 复用前面的代码,直接查找即可
elesds = sdsfromlonglong(intobj);
if (!setTypeIsMember(sets[j],elesds)) {
sdsfree(elesds);
break;
}
sdsfree(elesds);
}
} else if (encoding == OBJ_ENCODING_HT) {
if (!setTypeIsMember(sets[j],elesds)) {
break;
}
}
} /* Only take action when all sets contain the member */
// 当迭代完所有集合,说明每个set中都存在该值,是交集(注意分析最后一个迭代)
if (j == setnum) {
// 不存储交集的情况下,直接响应元素值即可
if (!dstkey) {
if (encoding == OBJ_ENCODING_HT)
addReplyBulkCBuffer(c,elesds,sdslen(elesds));
else
addReplyBulkLongLong(c,intobj);
cardinality++;
}
// 要存储交集数据,将值存储到 dstset 中
else {
if (encoding == OBJ_ENCODING_INTSET) {
elesds = sdsfromlonglong(intobj);
setTypeAdd(dstset,elesds);
sdsfree(elesds);
} else {
setTypeAdd(dstset,elesds);
}
}
}
}
setTypeReleaseIterator(si); if (dstkey) {
/* Store the resulting set into the target, if the intersection
* is not an empty set. */
// 存储集合之前会先把原来的数据删除,如果进行多次交集运算,dstKey 就相当于临时表咯
int deleted = dbDelete(c->db,dstkey);
if (setTypeSize(dstset) > ) {
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,"sinterstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
server.dirty++;
} else {
setDeferredMultiBulkLength(c,replylen,cardinality);
}
zfree(sets);
}
// compare 方法
int qsortCompareSetsByCardinality(const void *s1, const void *s2) {
return setTypeSize(*(robj**)s1)-setTypeSize(*(robj**)s2);
}
// 快排样例 sort.lua
-- extracted from Programming Pearls, page
function qsort(x,l,u,f)
if l<u then
local m=math.random(u-(l-))+l- -- choose a random pivot in range l..u
x[l],x[m]=x[m],x[l] -- swap pivot to first position
local t=x[l] -- pivot value
m=l
local i=l+
while i<=u do
-- invariant: x[l+..m] < t <= x[m+..i-]
if f(x[i],t) then
m=m+
x[m],x[i]=x[i],x[m] -- swap x[i] and x[m]
end
i=i+
end
x[l],x[m]=x[m],x[l] -- swap pivot to a valid place
-- x[l+..m-] < x[m] <= x[m+..u]
qsort(x,l,m-,f)
qsort(x,m+,u,f)
end
end

sdiff和sunion源码解析

void sunionCommand(client *c) {
sunionDiffGenericCommand(c,c->argv+,c->argc-,NULL,SET_OP_UNION);
} void sunionstoreCommand(client *c) {
sunionDiffGenericCommand(c,c->argv+,c->argc-,c->argv[],SET_OP_UNION);
} void sdiffCommand(client *c) {
sunionDiffGenericCommand(c,c->argv+,c->argc-,NULL,SET_OP_DIFF);
} void sdiffstoreCommand(client *c) {
sunionDiffGenericCommand(c,c->argv+,c->argc-,c->argv[],SET_OP_DIFF);
}
// 用法: SDIFFSTORE destination key1 [key2]
// t_set.c
void sdiffstoreCommand(client *c) {
// 看起来sdiff 与 sunion 共用了一段代码,为啥呢?
// 想想 sql 中的 full join
// c->argv[1] 是 dstKey
sunionDiffGenericCommand(c,c->argv+,c->argc-,c->argv[],SET_OP_DIFF);
}
// t_set.c, 差集并集运算
void sunionDiffGenericCommand(client *c, robj **setkeys, int setnum,
robj *dstkey, int op) {
robj **sets = zmalloc(sizeof(robj*)*setnum);
setTypeIterator *si;
robj *dstset = NULL;
sds ele;
int j, cardinality = ;
int diff_algo = ;
// 同样的套路,先查找各key的实例
// 不同的是,这里的key允许不存在,但不允许类型不一致
for (j = ; j < setnum; j++) {
robj *setobj = dstkey ?
lookupKeyWrite(c->db,setkeys[j]) :
lookupKeyRead(c->db,setkeys[j]);
if (!setobj) {
sets[j] = NULL;
continue;
}
if (checkType(c,setobj,OBJ_SET)) {
zfree(sets);
return;
}
sets[j] = setobj;
} /* Select what DIFF algorithm to use.
*
* Algorithm 1 is O(N*M) where N is the size of the element first set
* and M the total number of sets.
*
* Algorithm 2 is O(N) where N is the total number of elements in all
* the sets.
*
* We compute what is the best bet with the current input here. */
// 针对差集运算,做算法优化
if (op == SET_OP_DIFF && sets[]) {
long long algo_one_work = , algo_two_work = ; for (j = ; j < setnum; j++) {
if (sets[j] == NULL) continue; algo_one_work += setTypeSize(sets[]);
algo_two_work += setTypeSize(sets[j]);
} /* Algorithm 1 has better constant times and performs less operations
* if there are elements in common. Give it some advantage. */
algo_one_work /= ;
diff_algo = (algo_one_work <= algo_two_work) ? : ;
if (diff_algo == && setnum > ) {
/* With algorithm 1 it is better to order the sets to subtract
* by decreasing size, so that we are more likely to find
* duplicated elements ASAP. */
qsort(sets+,setnum-,sizeof(robj*),
qsortCompareSetsByRevCardinality);
}
} /* We need a temp set object to store our union. If the dstkey
* is not NULL (that is, we are inside an SUNIONSTORE operation) then
* this set object will be the resulting object to set into the target key*/
dstset = createIntsetObject(); if (op == SET_OP_UNION) {
/* Union is trivial, just add every element of every set to the
* temporary set. */
for (j = ; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */
// 依次添加即可,对于 sunion 来说,有序是无意义的
si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (setTypeAdd(dstset,ele)) cardinality++;
sdsfree(ele);
}
setTypeReleaseIterator(si);
}
}
// 使用算法1, 依次迭代最大元素
else if (op == SET_OP_DIFF && sets[] && diff_algo == ) {
/* DIFF Algorithm 1:
*
* We perform the diff by iterating all the elements of the first set,
* and only adding it to the target set if the element does not exist
* into all the other sets.
*
* This way we perform at max N*M operations, where N is the size of
* the first set, and M the number of sets. */
si = setTypeInitIterator(sets[]);
while((ele = setTypeNextObject(si)) != NULL) {
for (j = ; j < setnum; j++) {
if (!sets[j]) continue; /* no key is an empty set. */
if (sets[j] == sets[]) break; /* same set! */
// 只要有一个相同,就不算是差集??
if (setTypeIsMember(sets[j],ele)) break;
}
// 这里的差集是所有set的值都不相同或者为空??? 尴尬了
if (j == setnum) {
/* There is no other set with this element. Add it. */
setTypeAdd(dstset,ele);
cardinality++;
}
sdsfree(ele);
}
setTypeReleaseIterator(si);
}
// 使用算法2,直接以第一个元素为基础,后续set做remove,最后剩下的就是差集
else if (op == SET_OP_DIFF && sets[] && diff_algo == ) {
/* DIFF Algorithm 2:
*
* Add all the elements of the first set to the auxiliary set.
* Then remove all the elements of all the next sets from it.
*
* This is O(N) where N is the sum of all the elements in every
* set. */
for (j = ; j < setnum; j++) {
if (!sets[j]) continue; /* non existing keys are like empty sets */ si = setTypeInitIterator(sets[j]);
while((ele = setTypeNextObject(si)) != NULL) {
if (j == ) {
if (setTypeAdd(dstset,ele)) cardinality++;
} else {
if (setTypeRemove(dstset,ele)) cardinality--;
}
sdsfree(ele);
}
setTypeReleaseIterator(si); /* Exit if result set is empty as any additional removal
* of elements will have no effect. */
if (cardinality == ) break;
}
} /* Output the content of the resulting set, if not in STORE mode */
if (!dstkey) {
addReplyMultiBulkLen(c,cardinality);
si = setTypeInitIterator(dstset);
// 响应差集列表
while((ele = setTypeNextObject(si)) != NULL) {
addReplyBulkCBuffer(c,ele,sdslen(ele));
sdsfree(ele);
}
setTypeReleaseIterator(si);
decrRefCount(dstset);
} else {
/* If we have a target key where to store the resulting set
* create this key with the result set inside */
int deleted = dbDelete(c->db,dstkey);
if (setTypeSize(dstset) > ) {
// 存储差集列表,响应差集个数
dbAdd(c->db,dstkey,dstset);
addReplyLongLong(c,setTypeSize(dstset));
notifyKeyspaceEvent(NOTIFY_SET,
op == SET_OP_UNION ? "sunionstore" : "sdiffstore",
dstkey,c->db->id);
} else {
decrRefCount(dstset);
addReply(c,shared.czero);
if (deleted)
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",
dstkey,c->db->id);
}
signalModifiedKey(c->db,dstkey);
server.dirty++;
}
zfree(sets);
}
/* This is used by SDIFF and in this case we can receive NULL that should
* be handled as empty sets. */
int qsortCompareSetsByRevCardinality(const void *s1, const void *s2) {
robj *o1 = *(robj**)s1, *o2 = *(robj**)s2; return (o2 ? setTypeSize(o2) : ) - (o1 ? setTypeSize(o1) : );
}

6.SPOP

从存储在key的集合中移除并返回一个或多个随机元素。

此操作与SRANDMEMBER类似,它从一个集合中返回一个或多个随机元素,但不删除元素。

时间复杂度:O(1)

127.0.0.1:> spop myset1
""
127.0.0.1:> spop myset1
""
127.0.0.1:> spop myset1
""
127.0.0.1:> smembers myset1
) ""
) ""
127.0.0.1:>

源码解析

// 用法: SPOP key [count]
// t_set.c
void spopCommand(client *c) {
robj *set, *ele, *aux;
sds sdsele;
int64_t llele;
int encoding; if (c->argc == ) {
// 弹出指定数量的元素,略
spopWithCountCommand(c);
return;
} else if (c->argc > ) {
addReply(c,shared.syntaxerr);
return;
} /* Make sure a key with the name inputted exists, and that it's type is
* indeed a set */
if ((set = lookupKeyWriteOrReply(c,c->argv[],shared.nullbulk)) == NULL ||
checkType(c,set,OBJ_SET)) return; /* Get a random element from the set */
// 1. 随机获取一个元素,这是 spop 的定义
encoding = setTypeRandomElement(set,&sdsele,&llele); /* Remove the element from the set */
// 2. 删除元素
if (encoding == OBJ_ENCODING_INTSET) {
ele = createStringObjectFromLongLong(llele);
set->ptr = intsetRemove(set->ptr,llele,NULL);
} else {
ele = createStringObject(sdsele,sdslen(sdsele));
setTypeRemove(set,ele->ptr);
} notifyKeyspaceEvent(NOTIFY_SET,"spop",c->argv[],c->db->id); /* Replicate/AOF this command as an SREM operation */
aux = createStringObject("SREM",);
rewriteClientCommandVector(c,,aux,c->argv[],ele);
decrRefCount(aux); /* Add the element to the reply */
addReplyBulk(c,ele);
decrRefCount(ele); /* Delete the set if it's empty */
if (setTypeSize(set) == ) {
dbDelete(c->db,c->argv[]);
notifyKeyspaceEvent(NOTIFY_GENERIC,"del",c->argv[],c->db->id);
} /* Set has been modified */
signalModifiedKey(c->db,c->argv[]);
server.dirty++;
}
// 没啥好说的,就看下是如何随机的就好了
// t_set.c, 随机获取一个元素,赋值给 sdsele|llele
/* Return random element from a non empty set.
* The returned element can be a int64_t value if the set is encoded
* as an "intset" blob of integers, or an SDS string if the set
* is a regular set.
*
* The caller provides both pointers to be populated with the right
* object. The return value of the function is the object->encoding
* field of the object and is used by the caller to check if the
* int64_t pointer or the redis object pointer was populated.
*
* Note that both the sdsele and llele pointers should be passed and cannot
* be NULL since the function will try to defensively populate the non
* used field with values which are easy to trap if misused. */
int setTypeRandomElement(robj *setobj, sds *sdsele, int64_t *llele) {
if (setobj->encoding == OBJ_ENCODING_HT) {
// 1.1. dict 型的随机
dictEntry *de = dictGetRandomKey(setobj->ptr);
*sdsele = dictGetKey(de);
*llele = -; /* Not needed. Defensive. */
} else if (setobj->encoding == OBJ_ENCODING_INTSET) {
// 1.2. intset 型的随机
*llele = intsetRandom(setobj->ptr);
*sdsele = NULL; /* Not needed. Defensive. */
} else {
serverPanic("Unknown set encoding");
}
return setobj->encoding;
}
// 1.1. dict 型的随机
/* Return a random entry from the hash table. Useful to
* implement randomized algorithms */
dictEntry *dictGetRandomKey(dict *d)
{
dictEntry *he, *orighe;
unsigned int h;
int listlen, listele; if (dictSize(d) == ) return NULL;
if (dictIsRehashing(d)) _dictRehashStep(d);
// 基本原理就是一直接随机获取下标,直到有值
if (dictIsRehashing(d)) {
do {
/* We are sure there are no elements in indexes from 0
* to rehashidx-1 */
// 获取随机下标,须保证在 两个hash表的范围内
h = d->rehashidx + (random() % (d->ht[].size +
d->ht[].size -
d->rehashidx));
he = (h >= d->ht[].size) ? d->ht[].table[h - d->ht[].size] :
d->ht[].table[h];
} while(he == NULL);
} else {
do {
h = random() & d->ht[].sizemask;
he = d->ht[].table[h];
} while(he == NULL);
} /* Now we found a non empty bucket, but it is a linked
* list and we need to get a random element from the list.
* The only sane way to do so is counting the elements and
* select a random index. */
listlen = ;
orighe = he;
// 对于hash冲突情况,再随机一次
while(he) {
he = he->next;
listlen++;
}
listele = random() % listlen;
he = orighe;
while(listele--) he = he->next;
return he;
} // 1.2. intset 型的随机
// intset.c
/* Return random member */
int64_t intsetRandom(intset *is) {
// 这个随机就简单了,直接获取随机下标,因为intset可以保证自身元素的完整性
return _intsetGet(is,rand()%intrev32ifbe(is->length));
}

Redis系列(十):数据结构Set源码解析和SADD、SINTER、SDIFF、SUNION、SPOP命令的更多相关文章

  1. Alink漫谈&lpar;十九&rpar; :源码解析 之 分位点离散化Quantile

    Alink漫谈(十九) :源码解析 之 分位点离散化Quantile 目录 Alink漫谈(十九) :源码解析 之 分位点离散化Quantile 0x00 摘要 0x01 背景概念 1.1 离散化 1 ...

  2. Alink漫谈&lpar;十八&rpar; :源码解析 之 多列字符串编码MultiStringIndexer

    Alink漫谈(十八) :源码解析 之 多列字符串编码MultiStringIndexer 目录 Alink漫谈(十八) :源码解析 之 多列字符串编码MultiStringIndexer 0x00 ...

  3. Alink漫谈&lpar;二十&rpar; :卡方检验源码解析

    Alink漫谈(二十) :卡方检验源码解析 目录 Alink漫谈(二十) :卡方检验源码解析 0x00 摘要 0x01 背景概念 1.1 假设检验 1.2 H0和H1是什么? 1.3 P值 (P-va ...

  4. Redis系列(九):数据结构Hash源码解析和HSET、HGET命令

    2.源码解析 1.相关命令如下: {"hset",hsetCommand,,"wmF",,NULL,,,,,}, {"hsetnx",hse ...

  5. Java 集合系列Stack详细介绍&lpar;源码解析&rpar;和使用示例

    Stack简介 Stack是栈.它的特性是:先进后出(FILO, First In Last Out). java工具包中的Stack是继承于Vector(矢量队列)的,由于Vector是通过数组实现 ...

  6. 小学徒成长系列—StringBuilder &amp&semi; StringBuffer关键源码解析

    在前面的博文<小学徒成长系列—String关键源码解析>和<小学徒进阶系列—JVM对String的处理>中,我们讲到了关于String的常用方法以及JVM对字符串常量Strin ...

  7. ThreadPoolExecutor系列&lt&semi;三、ThreadPoolExecutor 源码解析&gt&semi;

    本文系作者原创,转载请注明出处:http://www.cnblogs.com/further-further-further/p/7681826.html 在源码解析前,需要先理清线程池控制的运行状态 ...

  8. Raft协议实战之Redis Sentinel的选举Leader源码解析

    这可能是我看过的写的最详细的关于redis 选举的文章了, 原文链接 Raft协议是用来解决分布式系统一致性问题的协议,在很长一段时间,Paxos被认为是解决分布式系统一致性的代名词.但是Paxos难 ...

  9. 【OpenStack】OpenStack系列13之Nova源码解析与API扩展

    学习思路 议程:代码结构-主干流程-分层架构-业务模型-数据库模型-消息模型 分布式架构:Api:横向扩展    rpc:纵向扩展 分层架构:Controller接口层.View/Manager逻辑层 ...

随机推荐

  1. JS懒加载

    4.如何使用js懒加载图片       a.懒加载图片是基于jquery.js的,所以: <script src="jquery.js" type="text/ja ...

  2. angular实现select的ng-options4

    ng实现简单的select <div ng-controller="ngSelect"> <select ng-model="vm.selectVal& ...

  3. over-float清除浮动

    <!DOCTYPE html> <html> <head> <meta charset="utf-8"> <title> ...

  4. Python输入一个数字打印等腰三角形

    要求 用户输入一个数字,按照数字打印出等腰三角形 思路 1,用户输入的数字为n代表一共有多少行 2,使用一个循环带两个for循环,第一层循环是循环行数,第二层两个平行for循环一个打印空格一个打印*号 ...

  5. IE8引用jQuery报&dollar;或者jQuery未定义

    最近公司做的项目要求兼容到IE8,结果在页面调试的时候出了个bug,在IE8上面一直报错$未定义,或者jQuery未定义,导致页面上面写的jQuery全部失效,在Chrome浏览器没有任何问题.很是头 ...

  6. codeforces285B

    Find Marble CodeForces - 285B Petya and Vasya are playing a game. Petya's got n non-transparent glas ...

  7. 以 BZOJ 2002 为例学习有根树LCT&lpar;Link-Cut Tree&rpar;

    以BZOJ 2002 弹飞绵羊为例学习有根树LCT(Link-Cut Tree) 注:本文非常简单,只涉及有根树LCT,对于无根树,LCT还有几个本文没有提到的操作,以后慢慢更新 =v= 知识储备 [ ...

  8. Linux常用命令总结--基础命令

    系统信息 1.arch 显示机器的处理器架构(1) 2.uname -m 显示机器的处理器架构(2) 3.lsb_release -a 查看操作系统版本 4.top 查看进程 5.free -m 查看 ...

  9. 洛谷P1117 优秀的拆分【Hash】【字符串】【二分】【好难不会】

    题目描述 如果一个字符串可以被拆分为AABBAABB的形式,其中 A和 B是任意非空字符串,则我们称该字符串的这种拆分是优秀的. 例如,对于字符串aabaabaaaabaabaa,如果令 A=aabA ...

  10. OpenCV持久化(二)

    如何利用OpenCV持久化自己的数据结构?我们来看看OpenCV中的一个例子. MyData.hpp定义自己的数据结构MyData如下: #ifndef MYDATA_HPP #define MYDA ...