[redis设计与实现][6]基本数据结构——压缩列表

压缩列表(ziplist)是列表键和哈希键的底层实现之一。
压缩列表结构:

属性 类型 长度 用途
zlbytes uint32_t 4字节 记录整个压缩列表占用的内存字节数:在对压缩列表进行内存重分配,或者计算zlend的位置时使用
zltail uint32_t 4字节 记录压缩列表尾节点记录压缩列表的起始地址便宜:用于快速定位尾节点
zllen uint16_t 2字节 记录压缩列表包含的节点数量:当这个值小于UINT64_MAX(65535)时,这个属性的值就是压缩列表包含的节点数量;当这个值等于UINT64_MAX时,节点的真实数量需要遍历整个压缩列表才能计算得出
entryX 列表节点 不定 压缩列表包含的各个节点,节点长度由节点保存的内容决定
zlend uint8_t 1字节 特殊值0xFF,用于标记压缩列表的末端

压缩列表节点结构:

  • previous_entry_length:以字节为单位,记录压缩列表中前一个节点的长度。previous_entry_length属性的长度可以是1字节或者5字节:
    • 如果前一节点的长度小于254字节,那么previous_entry_length属性的长度为1字节
    • 日过前一节点的长度大于等于254字节,那么previous_entry_length属性的长度为5字节:其中第一字解会被设置为0xFE(254),而之后的四个字节用于保存前一节点的长度
  • encoding:节点的encoding属性记录了节点content属性薄脆数据的类型以及长度:
    • 一字节、两字节或者五字节长,值的最高位为00、01或者10的是字节数组编码:这种编码便是节点的content属性保存着字节数组,数组的长度由编码除去最高两位之后的其他位记录;
    • 一字节长,值的最高位以11开头的是整数编码:这种编码表示节点的content属性保存着整数值,整数值的类型和长度由编码去除最高两位之后的其他位记录;
  • content

字节数组编码:

编码 编码长度 content属性保存的值
00bbbbbb 1字节 长度小于等于63字节的字节数组
01bbbbbb xxxxxxxx 2字节 长度小于等于16383字节的字节数组
10______ aaaaaaaa bbbbbbbb cccccccc dddddddd 5字节 长度小于等于4294967295的字节数组

整数编码:

编码 编码长度 content属性保存的值
11000000 1字节 int16_t类型的整数
11010000 1字节 int32_t类型的整数
11100000 1字节 int64_t类型的整数
11110000 1字节 24位有符号整数
11111110 1字节 8位有符号整数
1111xxxx 1字节 无contennt属性,xxxx保存了0到12之前的值

连锁更新:
多个连续、长度结余250到253字节之间的节点,如果前面有插入或删除节点,导致previous_entry_length属性需要扩张成5字节,可能导致这些连续的节点都需要更新。最坏情况可能会对整个压缩列表重新执行N次空间重新分配。

unsigned char ziplistNew(void);
[cce lang="c"]
unsigned char *ziplistNew(void) {
//多一个表示结尾的字节
unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
unsigned char *zl = zmalloc(bytes);
//#define ZIPLIST_BYTES(zl) (
((uint32_t)(zl)))
ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
//#define ZIPLIST_TAIL_OFFSET(zl) (
((uint32_t)((zl)+sizeof(uint32_t))))
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
//#define ZIPLIST_LENGTH(zl) (
((uint16_t)((zl)+sizeof(uint32_t)2)))
ZIPLIST_LENGTH(zl) = 0;
//#define ZIP_END 255
zl[bytes-1] = ZIP_END;
return zl;
}
[/cce]

unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
[cce lang="c"]

unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where) {
unsigned char *p;
//#define ZIPLIST_ENTRY_HEAD(zl) ((zl)+ZIPLIST_HEADER_SIZE)
//#define ZIPLIST_ENTRY_END(zl) ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl);
return __ziplistInsert(zl,p,s,slen);
}
static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen;
unsigned int prevlensize, prevlen = 0;
size_t offset;
int nextdiff = 0;
unsigned char encoding = 0;
long long value = 123456789; /* initialized to avoid warning. Using a value
that is easy to see if for some reason
we use it uninitialized. */
zlentry tail;

/* Find out prevlen for the entry that is inserted. */
if (p[0] != ZIP_END) {

//获取当前节点的previous_entry_length
ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
} else {
unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
if (ptail[0] != ZIP_END) {
prevlen = zipRawEntryLength(ptail);
}
}

/* See if the entry can be encoded */

//尝试是否可以编码为整数,同时返回整数编码
if (zipTryEncoding(s,slen,&value,&encoding)) {
/* 'encoding' is set to the appropriate integer encoding /
//是整数编码,返回content字节数
reqlen = zipIntSize(encoding);
} else {
/
'encoding' is untouched, however zipEncodeLength will use the
* string length to figure out how to encode it. /
//字符数组编码,长度直接是数组长度
reqlen = slen;
}
/
We need space for both the length of the previous entry and
* the length of the payload. */
//加上前置节点长度编码占用字节数
reqlen += zipPrevEncodeLength(NULL,prevlen);
//加上编码占用字节数
reqlen += zipEncodeLength(NULL,encoding,slen);

/* When the insert position is not equal to the tail, we need to
 * make sure that the next entry can hold this entry's length in
 * its prevlen field. */

//判断插入位置的节点,实际前置节点部分字节差
//如果有差距后面需要从1字节变成5字节
nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;

/* Store offset because a realloc may change the address of zl. */
offset = p-zl;

//重新分配压缩列表内存,重新计算当前插入节点位置
zl = ziplistResize(zl,curlen+reqlen+nextdiff);
p = zl+offset;

/* Apply memory move when necessary and update tail offset. */

//从头插入,后面的节点内存位置向后移动
if (p[0] != ZIP_END) {
/* Subtract one because of the ZIP_END bytes */
//向后移动,目标位置当前节点以后,起始位置当前位置减去前置节点便宜,长度为所有原节点长度减去尾部标示符
memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

    /* Encode this entry's raw length in the next entry. */

//设置后面节点的前置节点长度
zipPrevEncodeLength(p+reqlen,reqlen);

    /* Update offset for tail */

//更新尾部节点偏移量
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

    /* When the tail contains more than one entry, we need to take
     * "nextdiff" in account as well. Otherwise, a change in the
     * size of prevlen doesn't have an effect on the *tail* offset. */
    tail = zipEntry(p+reqlen);

//尾部标示符不对,说明后一个节点的前置节点便宜有扩大(nextdiff > 0),以新的为准
if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
}
} else {
//尾部插入,直接更新尾部标记
/* This element will be the new tail. */
ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
}

/* When nextdiff != 0, the raw length of the next entry has changed, so
 * we need to cascade the update throughout the ziplist */
if (nextdiff != 0) {
    offset = p-zl;

//传说中的连锁更新
zl = __ziplistCascadeUpdate(zl,p+reqlen);
p = zl+offset;
}

/* Write the entry */

//写入当前节点
//前置节点偏移
p += zipPrevEncodeLength(p,prevlen);
//编码
p += zipEncodeLength(p,encoding,slen);
//#define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
if (ZIP_IS_STR(encoding)) {
//是字符串编码,直接复制过去
memcpy(p,s,slen);
} else {
//是整数编码,写入整数
zipSaveInteger(p,value,encoding);
}
//增加压缩列表节点数
ZIPLIST_INCR_LENGTH(zl,1);
return zl;
}

static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
long long value;

if (entrylen &gt;= 32 || entrylen == 0) return 0;

//尝试将字符串转换成整数
if (string2ll((char)entry,entrylen,&value)) {
/
Great, the string can be encoded. Check what's the smallest
* of our encoding types that can hold this value. */
if (value >= 0 && value <= 12) {
*encoding = ZIP_INT_IMM_MIN+value;
} else if (value >= INT8_MIN && value <= INT8_MAX) {
*encoding = ZIP_INT_8B;
} else if (value >= INT16_MIN && value <= INT16_MAX) {
*encoding = ZIP_INT_16B;
} else if (value >= INT24_MIN && value <= INT24_MAX) {
*encoding = ZIP_INT_24B;
} else if (value >= INT32_MIN && value <= INT32_MAX) {
*encoding = ZIP_INT_32B;
} else {
*encoding = ZIP_INT_64B;
}
*v = value;
return 1;
}
return 0;
}
//联锁(级联)更新
static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
size_t offset, noffset, extra;
unsigned char *np;
zlentry cur, next;

while (p[0] != ZIP_END) {
    cur = zipEntry(p);
    rawlen = cur.headersize + cur.len;
    rawlensize = zipPrevEncodeLength(NULL,rawlen);

    /* Abort if there is no next entry. */

//下一个节点是列表尾,退出
if (p[rawlen] == ZIP_END) break;
next = zipEntry(p+rawlen);

    /* Abort when &quot;prevlen&quot; has not changed. */

//下一个节点前置长度节点没有变化,不需要级联更新
if (next.prevrawlen == rawlen) break;

//下一个节点需要扩张前置节点偏移
if (next.prevrawlensize < rawlensize) {
/* The "prevlen" field of "next" needs more bytes to hold
* the raw length of "cur". */
offset = p-zl;
extra = rawlensize-next.prevrawlensize;
//压缩列表重新分配空间,原空间加上扩充空间
zl = ziplistResize(zl,curlen+extra);
p = zl+offset;

        /* Current pointer and offset for next element. */
        np = p+rawlen;
        noffset = np-zl;

        /* Update tail offset when next element is not the tail element. */

//更新尾节点偏移
if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
ZIPLIST_TAIL_OFFSET(zl) =
intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
}

        /* Move the tail to the back. */

//向后移动内存
memmove(np+rawlensize,
np+next.prevrawlensize,
curlen-noffset-next.prevrawlensize-1);
//重新设置上一节点长度
zipPrevEncodeLength(np,rawlen);

        /* Advance the cursor */

//向后遍历是否需要继续调整
p += rawlen;
curlen += extra;
} else {
//下一个节点是要缩短的,强制不缩短,防止过多的内存重新分配
if (next.prevrawlensize > rawlensize) {
/* This would result in shrinking, which we want to avoid.
* So, set "rawlen" in the available bytes. */
zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
} else {
zipPrevEncodeLength(p+rawlen,rawlen);
}

        /* Stop here, as the raw length of &quot;next&quot; has not changed. */
        break;
    }
}
return zl;

}
//写入整数
static void zipSaveInteger(unsigned char p, int64_t value, unsigned char encoding) {
int16_t i16;
int32_t i32;
int64_t i64;
if (encoding == ZIP_INT_8B) {
((int8_t
)p)[0] = (int8_t)value;
} else if (encoding == ZIP_INT_16B) {
i16 = value;
memcpy(p,&i16,sizeof(i16));
memrev16ifbe(p);
} else if (encoding == ZIP_INT_24B) {
i32 = value<<8;
memrev32ifbe(&i32);
memcpy(p,((uint8_t)&i32)+1,sizeof(i32)-sizeof(uint8_t));
} else if (encoding == ZIP_INT_32B) {
i32 = value;
memcpy(p,&i32,sizeof(i32));
memrev32ifbe(p);
} else if (encoding == ZIP_INT_64B) {
i64 = value;
memcpy(p,&i64,sizeof(i64));
memrev64ifbe(p);
} else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
/
Nothing to do, the value is stored in the encoding itself. */
} else {
assert(NULL);
}
}
[/cce]

发表回复

您的电子邮箱地址不会被公开。 必填项已用 * 标注

此站点使用Akismet来减少垃圾评论。了解我们如何处理您的评论数据