Skip to content

或可使用builtin函数优化popcount的过程 #1

@Kim-J-Smith

Description

@Kim-J-Smith

或可使用builtin函数优化popcount的过程


  • 原代码 70 - 85 行有popcount的宏,但没有利用builtin函数,原始代码如下:
#if 1
// See http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static inline bit_array_val_t _windows_popcount(bit_array_val_t w)
{
    w = w - ((w >> 1) & (bit_array_val_t) ~(bit_array_val_t)0 / 3);
    w = (w & (bit_array_val_t) ~(bit_array_val_t)0 / 15 * 3) +
        ((w >> 2) & (bit_array_val_t) ~(bit_array_val_t)0 / 15 * 3);
    w = (w + (w >> 4)) & (bit_array_val_t) ~(bit_array_val_t)0 / 255 * 15;
    return (bit_array_val_t)(w * ((bit_array_val_t) ~(bit_array_val_t)0 / 255)) >>
           (sizeof(bit_array_val_t) - 1) * 8;
}

#define POPCOUNT(x) _windows_popcount(x)
#else
#define POPCOUNT(x) (unsigned)__builtin_popcountll(x)
#endif
  • 或可以利用上编译器的builtin函数进行更彻底的优化,如下:
/**
 * @brief Define a macro to count how many "1"s is in a binary integer number.
 * 
 * @name POPCOUNT
 * 
 * @param[in] x - The integer number. (Type: bit_array_val_t)
 * 
 * @return The number of 1s contained in the binary form corresponding to this integer.
 */
#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
 /* Use GCC and GCC version is greater than 3.4 */
 #define POPCOUNT(x) (uint32_t)__builtin_popcountll((unsigned long long)(x))
#elif defined(__clang__) && (__clang_major__ > 5)
 /* Use Clang and Clang version is greater than 5 */
 #define POPCOUNT(x) (uint32_t)__builtin_popcountll((unsigned long long)(x))
#elif defined(_MSC_VER) && (_MSC_VER >= 1500)
 /* Use MSVC and MSVC is greater than 2008 */
 #define POPCOUNT(x) (uint32_t)__popcnt64((unsigned __int64)(x))
#else
 /* The compiler didn't provide the builtin function */
 /* Counting bits set, in parallel */

// See http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
static inline bit_array_val_t _windows_popcount(bit_array_val_t w)
{
    w = w - ((w >> 1) & (bit_array_val_t) ~(bit_array_val_t)0 / 3);
    w = (w & (bit_array_val_t) ~(bit_array_val_t)0 / 15 * 3) + ((w >> 2) & (bit_array_val_t) ~(bit_array_val_t)0 / 15 * 3);
    w = (w + (w >> 4)) & (bit_array_val_t) ~(bit_array_val_t)0 / 255 * 15;
    return (bit_array_val_t)(w * ((bit_array_val_t) ~(bit_array_val_t)0 / 255)) >> (sizeof(bit_array_val_t) - 1) * 8;
}

 #define POPCOUNT(x) _windows_popcount(x)
#endif /* POPCOUNT */

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions