string.h基于汇编实现strcmp ,和普通strcmp,针对循环调用次数和字符串查找长度2个纬度做了一次性能对比效测试。
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
using namespace std;
static inline int strcmp_asm(const char * cs,const char * ct)
{
int d0, d1;
register int __res;
__asm__ __volatile__(
"1:\tlodsb\n\t"
"scasb\n\t"
"jne 2f\n\t"
"testb %%al,%%al\n\t"
"jne 1b\n\t"
"xorl %�x,%�x\n\t"
"jmp 3f\n"
"2:\tsbbl %�x,%�x\n\t"
"orb $1,%%al\n"
"3:"
:"=a" (__res), "=&s" (d0), "=&d" (d1)
:"1" (cs),"2" (ct));
return __res;
}
static inline int strcmp_normal(const char * cs,const char * ct)
{
while(*cs!='\0' && *ct!='\0')
{
if(*cs != *ct)
return *cs<*ct?-1:1;
cs ;
ct ;
}
if(*cs != *ct)
return *cs<*ct?-1:1;
return 0;
}
static inline int generator_data(string &s,int isamelen=1,const char *sepc=null)
{
s.clear();
for(int i=0;i1)
{
loop = atoll(argv[1]);
}
if(argc>2)
{
isamelen = atoll(argv[2]);
}
printf("input loop:%d,isamelen:%d\n",loop,isamelen);
strcmp_perfom(loop,isamelen);
return 0;
}
测试结果如下,在匹配串长度足够的情况下,性能差了3~4倍。
input loop:5000,isamelen:100000
loop:5000,strcmp_asm,time:468 ms,strcmp_normal,time:1747 ms
input loop:50000,isamelen:10000
loop:50000,strcmp_asm,time:501 ms,strcmp_normal,time:1749 ms
input loop:500000,isamelen:1000
loop:500000,strcmp_asm,time:494 ms,strcmp_normal,time:1787 ms
input loop:5000000,isamelen:100
loop:5000000,strcmp_asm,time:693 ms,strcmp_normal,time:1922 ms
input loop:50000000,isamelen:10
loop:50000000,strcmp_asm,time:1337 ms,strcmp_normal,time:2424 ms
在匹配串极短的场景,1,2,4字节的情况,性能也有1.5~2倍左右的差距
input loop:50000000,isamelen:4
loop:50000000,strcmp_asm,time:849 ms,strcmp_normal,time:1425 ms
input loop:50000000,isamelen:2
loop:50000000,strcmp_asm,time:753 ms,strcmp_normal,time:1130 ms
input loop:50000000,isamelen:1
loop:50000000,strcmp_asm,time:670 ms,strcmp_normal,time:1040 ms