mirror of
https://git.busybox.net/busybox
synced 2026-01-19 15:29:37 +00:00
libbb/bitops.c: add inlining comment
Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
parent
c305c81c94
commit
11d4c08d75
1 changed files with 20 additions and 0 deletions
|
|
@ -58,6 +58,26 @@ void FAST_FUNC xorbuf16_aligned_long(void *dst, const void *src)
|
|||
# endif
|
||||
#endif
|
||||
}
|
||||
// The above can be inlined in libbb.h, in a way where compiler
|
||||
// is even free to use better addressing modes than (%reg), and
|
||||
// to keep the result in a register
|
||||
// (to not store it to memory after each XOR):
|
||||
//#if defined(__SSE__)
|
||||
//#include <xmmintrin.h>
|
||||
//^^^ or just: typedef float __m128_u attribute((__vector_size__(16),__may_alias__,__aligned__(1)));
|
||||
//static ALWAYS_INLINE void xorbuf16_aligned_long(void *dst, const void *src)
|
||||
//{
|
||||
// __m128_u xmm0, xmm1;
|
||||
// asm volatile(
|
||||
//"\n xorps %1,%0"
|
||||
// : "=x" (xmm0), "=x" (xmm1)
|
||||
// : "0" (*(__m128_u*)dst), "1" (*(__m128_u*)src)
|
||||
// );
|
||||
// *(__m128_u*)dst = xmm0; // this store may be optimized out!
|
||||
//}
|
||||
//#endif
|
||||
// but I don't trust gcc optimizer enough to not generate some monstrosity.
|
||||
// See GMULT() function in TLS code as an example.
|
||||
|
||||
void FAST_FUNC xorbuf64_3_aligned64(void *dst, const void *src1, const void *src2)
|
||||
{
|
||||
|
|
|
|||
Loading…
Reference in a new issue