算法核心思想:分治法,第一次統(tǒng)計(jì)每2位的1的個(gè)數(shù),第二次統(tǒng)計(jì)每4位1的個(gè)數(shù),第三次統(tǒng)計(jì)每8位1的個(gè)數(shù),依次相加即可得到結(jié)果。
F = 0x55555555 = 01010101010101010101010101010101
T = 0x33333333 = 00110011001100110011001100110011
O = 0x0f0f0f0f = 00001111000011110000111100001111
現(xiàn)在將數(shù)字分成16組,每組2位,例如:9 = 1001 = 00,00,..... 00,10,01
第一步是統(tǒng)計(jì)每2位中1的個(gè)數(shù)。由于每2位數(shù)字最多可包含2個(gè)1,所以需要分別統(tǒng)計(jì)兩次。如下:
i = (i & 0x55555555) + ((i >> 1) & 0x55555555);
i&F”僅保留奇數(shù)位置的1,(i >> 1)&F僅保留偶數(shù)位置的1。二者相加得到每2位具有1的個(gè)數(shù)。
同樣,再統(tǒng)計(jì)每4位上的1的格式,將0011抽象成一個(gè)大的1即可得到相似的代碼。
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
i = (i & 0x0F0F0F0F) + (i >> 4)) & 0x0F0F0F0F);
而0x01010101 = (1 << 24) + (1 << 16) + (1 << 8) + 1,所以i * 0x01010101 = (i << 24) + (i << 16) + (i << 8) + i
所以i * 0x01010101最高位就是原始輸入的1出現(xiàn)次數(shù)的最終結(jié)果。移位則將是最高位的值移到最低位。
res = i * (0x01010101) > >24
redis源碼如下:
size_t redisPopcount(void *s, long count) {
size_t bits = 0;
unsigned char *p = s;
uint32_t *p4;
// 通過查表來計(jì)算,對于 1 字節(jié)所能表示的值來說
// 這些值的二進(jìn)制表示所帶有的 1 的數(shù)量
// 比如整數(shù) 3 的二進(jìn)制表示 0011 ,帶有兩個(gè) 1
// 正好是查表 bitsinbyte[3] == 2
static const unsigned char bitsinbyte[256] = {0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,1,2,2,3,2,3,3,4,2,3,3,4,3,4,4,5,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,2,3,3,4,3,4,4,5,3,4,4,5,4,5,5,6,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,3,4,4,5,4,5,5,6,4,5,5,6,5,6,6,7,4,5,5,6,5,6,6,7,5,6,6,7,6,7,7,8};
/* Count initial bytes not aligned to 32 bit. */
while((unsigned long)p & 3 && count) {
bits += bitsinbyte[*p++];
count--;
}
/* Count bits 16 bytes at a time */
// 每次統(tǒng)計(jì) 16 字節(jié)
// 關(guān)于這里所使用的優(yōu)化算法,可以參考:
// http://yesteapea.wordpress.com/2013/03/03/counting-the-number-of-set-bits-in-an-integer/
p4 = (uint32_t*)p;
while(count>=16) {
uint32_t aux1, aux2, aux3, aux4;
aux1 = *p4++;
aux2 = *p4++;
aux3 = *p4++;
aux4 = *p4++;
count -= 16;
aux1 = aux1 - ((aux1 >> 1) & 0x55555555);
aux1 = (aux1 & 0x33333333) + ((aux1 >> 2) & 0x33333333);
aux2 = aux2 - ((aux2 >> 1) & 0x55555555);
aux2 = (aux2 & 0x33333333) + ((aux2 >> 2) & 0x33333333);
aux3 = aux3 - ((aux3 >> 1) & 0x55555555);
aux3 = (aux3 & 0x33333333) + ((aux3 >> 2) & 0x33333333);
aux4 = aux4 - ((aux4 >> 1) & 0x55555555);
aux4 = (aux4 & 0x33333333) + ((aux4 >> 2) & 0x33333333);
bits += ((((aux1 + (aux1 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux2 + (aux2 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux3 + (aux3 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24) +
((((aux4 + (aux4 >> 4)) & 0x0F0F0F0F) * 0x01010101) >> 24);
}
/* Count the remaining bytes. */
// 不足 16 字節(jié)的,剩下的每個(gè)字節(jié)通過查表來完成
p = (unsigned char*)p4;
while(count--) bits += bitsinbyte[*p++];
return bits;
}