有時候用到位運算。需要快速找到一個整數的二進制第一個1或0在哪個位(下標)?例如:十進制數100的二進制是1100100,那麼它的第一個1在下標 為2的位置(bsf, bit scan forward)或6的位置(bsr, bit scan in reverse order),由於只用於存儲一個狀態,至於用bsf還是bsr則無所謂。
解決這個問題的第一個想法就是用內聯匯編的做法,使用特別的CPU指令去找,但匯編的可移植性比較差,不同的CPU型號使用的指令可能不一樣,執行速度也不一樣。
假設找一個64位無符號整數二進制的第一個1,用bsf, AT& T匯編(gcc匯編)可以這樣做:
1 // bit scan forward for 64 bit integral number
2 /* ============================================ */
3 inline int bsf_asm (uint64_t w)
4 {
5 int x1, x2;
6 asm ("bsf %0,%0\n" "jnz 1f\n" "bsf %1,%0\n" "jz 1f\n" "addl $32,%0\n"
7 "1:": "=&q" (x1), "=&q" (x2):"1" ((int) (w >> 32)),
8 "0" ((int) w));
9 return x1;
10 }
如果用C來實現的話,那就有點麻煩了,在此不講復雜的數學原理,僅僅給出代碼。
1 // bit scan forward for 64 bit integral number
2 /* ============================================ */
3 inline int bsf_folded (uint64_t bb)
4 {
5 static const int lsb_64_table[64] =
6 {
7 63, 30, 3, 32, 59, 14, 11, 33,
8 60, 24, 50, 9, 55, 19, 21, 34,
9 61, 29, 2, 53, 51, 23, 41, 18,
10 56, 28, 1, 43, 46, 27, 0, 35,
11 62, 31, 58, 4, 5, 49, 54, 6,
12 15, 52, 12, 40, 7, 42, 45, 16,
13 25, 57, 48, 13, 10, 39, 8, 44,
14 20, 47, 38, 22, 17, 37, 36, 26
15 };
16 unsigned int folded;
17 bb ^= bb - 1;
18 folded = (int) bb ^ (bb >> 32);
19 return lsb_64_table[folded * 0x78291ACF >> 26];
20 }
如果想從後往前搜索一個整數的二進制第一個1的下標,用匯編可以這樣做。
1 // bit scan in reverse order for 64 bit integral number
2 /* ============================================ */
3 inline int bsr_asm (uint64_t w)
4 {
5 int x1, x2;
6 asm ("bsr %1,%0\n" "jnz 1f\n" "bsr %0,%0\n" "subl $32,%0\n"
7 "1: addl $32,%0\n": "=&q" (x1), "=&q" (x2):"1" ((int) (w >> 32)),
8 "0" ((int) w));
9 return x1;
10 }
如果用C來實現的話,也比較簡單,用divide and conquer 的原理就不會太慢。
1 // a logn (n == 32)algorithm for bit scan in reverse order
2 /* ============================================ */
3 inline int bsr32(uint32_t bb)
4 {
5 static const char msb_256_table[256] =
6 {
7 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3,
8 4, 4, 4, 4, 4, 4, 4, 4,4, 4, 4, 4,4, 4, 4, 4,
9 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
10 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
11 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
12 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
13 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
14 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
15 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
16 };
17 int result = 0;
18
19 if (bb > 0xFFFF)
20 {
21 bb >>= 16;
22 result += 16;
23 }
24 if (bb > 0xFF)
25 {
26 bb >>= 8;
27 result += 8;
28 }
29
30 return (result + msb_256_table[bb]);
31 }
32
33 /* ============================================ */
34 inline int bsr_in_C(uint64_t bb)
35 {
36 const uint32_t hb = bb >> 32;
37 return hb ? 32 + bsr32((uint32_t)hb) : bsr32((uint32_t)bb);
38
39 }
40
下面這個似乎也可以,失敗時返回-1023,至於速度快慢就要看編譯器的嗜好了。
1 // bit scan in reverse order for 64 bit integral number
2 /* ============================================ */
3 inline int bsr_double (uint64_t bb)
4 {
5 union
6 {
7 double d;
8 struct
9 {
10 unsigned int mantissal : 32;
11 unsigned int mantissah : 20;
12 unsigned int exponent : 11;
13 unsigned int sign : 1;
14 };
15 } ud;
16 ud.d = (double)(bb & ~(bb >> 32));
17 return ud.exponent - 1023;
18 }
19
以上uint64_t和uint32_t是新C++標准可以支持的整型,分別相當於舊的unsigned long long和unsigned long類型。以上代碼不是我的原創,而是來自國外某位朋友,我稍微加工了一下貼到這裡,版權屬於原作者,如果我沒有記錯的話應該是GNU。