BBYR Achieve
返回信息流
这是一条镜像帖。来源:北邮人论坛 / soft-design / #15790同步于 2007/3/17
该镜像源已超过 30 天没有更新,可能在源站已被删除。
SoftDesign机器人发帖

[求助]哪位高人帮我分析一下以下程序的效率

Jarod
2007/3/17镜像同步52 回复
search1 与 search2 都实现同样的功能,请问谁的效率高? int search1(const char * substr, const char * str) { size_t substr_len = strlen(substr); size_t str_len = strlen(str); if (substr_len > str_len || substr_len==0) return -1; int i,j; for (i = 0; i< str_len; i++) { for (j=0; j<substr_len; i++,j++) if (substr[j] != str[i]) break; i -= j; if (j>= substr_len) return i; } return -1; } int search2(const char * substr, const char * str) { size_t substr_len = strlen(substr); size_t str_len = strlen(str); if (substr_len > str_len || substr_len==0) return -1; const char * str_high_bound, * substr_pos, * substr_high_bound, *str_pos; str_high_bound = str + str_len; substr_high_bound = substr+ substr_len; for(str_pos = str; str_pos < str_high_bound; str_pos ++) { for(substr_pos = substr; substr_pos<substr_high_bound; str_pos++, substr_pos++) { if (*str_pos != *substr_pos) break; } str_pos -= (substr_pos - substr); if (substr_pos >= substr_high_bound) return (str_pos - str); } return -1; } 其实我已经试过了,竟然是search1快。请高人帮分析一下原因。
订阅后,新回复会通过你的通知中心匿名送达。
9 条回复
coolfantasy机器人#1 · 2007/3/18
字符串搜索? 没仔细看 不过有时候是实际效率跟理论最佳效率不符 【 在 Jarod (美牙~~) 的大作中提到: 】 : 标 题: [求助]哪位高人帮我分析一下以下程序的效率 : 发信站: 北邮人论坛 (Sun Mar 18 00:04:38 2007), 站内 : : search1 与 search2 都实现同样的功能,请问谁的效率高? : : int search1(const char * substr, const char * str) { : size_t substr_len = strlen(substr); : size_t str_len = strlen(str); : if (substr_len > str_len || substr_len==0) return -1; : int i,j; : for (i = 0; i< str_len; i++) { : for (j=0; j<substr_len; i++,j++) : if (substr[j] != str[i]) break; : i -= j; : if (j>= substr_len) return i; : } : return -1; : } : int search2(const char * substr, const char * str) { : size_t substr_len = strlen(substr); : size_t str_len = strlen(str); : if (substr_len > str_len || substr_len==0) return -1; : const char * str_high_bound, * substr_pos, * substr_high_bound, *str_pos; : str_high_bound = str + str_len; : substr_high_bound = substr+ substr_len; : for(str_pos = str; str_pos < str_high_bound; str_pos ++) { : for(substr_pos = substr; substr_pos<substr_high_bound; str_pos++, substr_pos++) { : if (*str_pos != *substr_pos) break; : } : str_pos -= (substr_pos - substr); : if (substr_pos >= substr_high_bound) return (str_pos - str); : } : return -1; : } : : : : 其实我已经试过了,竟然是search1快。请高人帮分析一下原因。 : -- : looking for a job... : : ※ 来源:·北邮人论坛 http://forum.byr.edu.cn·[FROM: 59.64.208.*]
StarFallLuna机器人#2 · 2007/3/18
【 在 Jarod (美牙~~) 的大作中提到: 】 : search1 与 search2 都实现同样的功能,请问谁的效率高? : int search1(const char * substr, const char * str) { : size_t substr_len = strlen(substr); : size_t str_len = strlen(str); : if (substr_len > str_len || substr_len==0) return -1; : int i,j; : for (i = 0; i< str_len; i++) { : for (j=0; j<substr_len; i++,j++) ~~~~~~~~I think there may be some problem here try sub="abc" str="aabc" : if (substr[j] != str[i]) break; : i -= j; : if (j>= substr_len) return i; : } : return -1; : } : int search2(const char * substr, const char * str) { : size_t substr_len = strlen(substr); : size_t str_len = strlen(str); : if (substr_len > str_len || substr_len==0) return -1; I have no idea about the input situation.So I can't figure out how much this statement helps : const char * str_high_bound, * substr_pos, * substr_high_bound, *str_pos; : str_high_bound = str + str_len; : substr_high_bound = substr+ substr_len; : for(str_pos = str; str_pos < str_high_bound; str_pos ++) { : for(substr_pos = substr; substr_pos<substr_high_bound; str_pos++, substr_pos++) { : if (*str_pos != *substr_pos) break; : } : str_pos -= (substr_pos - substr); : if (substr_pos >= substr_high_bound) return (str_pos - str); : } : return -1; : } : 其实我已经试过了,竟然是search1快。请高人帮分析一下原因。 It is easier for compiler to do some optimization if you write in the first style. It is very difficult for compiler to optimize code like the second one. Generally speaking, the compiler can manage it better than you for this situation. That's just my personal opinion, wish it help
RemoteFish机器人#3 · 2007/3/18
把这两个函数分放到两个文件中, 用 gcc -S 可以得到 C 译成的 ASM 代码, 比如 a.s b.s, 用 vimdiff 可以比较它们的不同. 这里在不加任何优化参数的情况下, 得到的结果是代码二比代码一多了一些代码. 直观地来看, 两个代码虽然一个基于数组, 一个指针, 但是后者并没有针对指针及字符串的特点做有益的优化, 相反, 由于指针自增时一次要增 4, 数组索引 (i, j) 自增时只增 1, 因此可能用 inc 运算来代替并提高速度, 因此代码二反而浪费了一些操作 另外, 楼主不妨把所有的 x++ 改成 ++x. 关于这二者的区别就在于是否需要一个临时变量. 还有, 利用编译器的优化选项往往可以减少代码设计中的欠缺之处. P.S. 不妨传一些测试数据上来
RemoteFish机器人#4 · 2007/3/18
【 在 StarFallLuna 的大作中提到: 】 : try sub="abc" str="aabc" 这个, 楼主后来是有把位置重置的操作的
Jarod机器人#5 · 2007/3/18
vc2005的debug版,而且禁止优化。以下是反汇编,两者非常相似。 PS: search1 实际是 search_slow. search2 -> search. int search1(const char * substr, const char * str) { 004115A0 push ebp 004115A1 mov ebp,esp 004115A3 sub esp,0F0h 004115A9 push ebx 004115AA push esi 004115AB push edi 004115AC lea edi,[ebp-0F0h] 004115B2 mov ecx,3Ch 004115B7 mov eax,0CCCCCCCCh 004115BC rep stos dword ptr es:[edi] size_t substr_len = strlen(substr); 004115BE mov eax,dword ptr [substr] 004115C1 push eax 004115C2 call @ILT+275(_strlen) (411118h) 004115C7 add esp,4 004115CA mov dword ptr [substr_len],eax size_t str_len = strlen(str); 004115CD mov eax,dword ptr [str] 004115D0 push eax 004115D1 call @ILT+275(_strlen) (411118h) 004115D6 add esp,4 004115D9 mov dword ptr [str_len],eax if (substr_len > str_len || substr_len==0) return -1; 004115DC mov eax,dword ptr [substr_len] 004115DF cmp eax,dword ptr [str_len] 004115E2 ja search_slow+4Ah (4115EAh) 004115E4 cmp dword ptr [substr_len],0 004115E8 jne search_slow+4Fh (4115EFh) 004115EA or eax,0FFFFFFFFh 004115ED jmp search_slow+0C1h (411661h) int i,j; for (i = 0; i< str_len; i++) { 004115EF mov dword ptr [i],0 004115F6 jmp search_slow+61h (411601h) 004115F8 mov eax,dword ptr [i] 004115FB add eax,1 004115FE mov dword ptr [i],eax 00411601 mov eax,dword ptr [i] 00411604 cmp eax,dword ptr [str_len] 00411607 jae search_slow+0BEh (41165Eh) for (j=0; j<substr_len; i++,j++) 00411609 mov dword ptr [j],0 00411610 jmp search_slow+84h (411624h) 00411612 mov eax,dword ptr [i] 00411615 add eax,1 00411618 mov dword ptr [i],eax 0041161B mov ecx,dword ptr [j] 0041161E add ecx,1 00411621 mov dword ptr [j],ecx 00411624 mov eax,dword ptr [j] 00411627 cmp eax,dword ptr [substr_len] 0041162A jae search_slow+0A6h (411646h) if (substr[j] != str[i]) break; 0041162C mov eax,dword ptr [substr] 0041162F add eax,dword ptr [j] 00411632 movsx ecx,byte ptr [eax] 00411635 mov edx,dword ptr [str] 00411638 add edx,dword ptr [i] 0041163B movsx eax,byte ptr [edx] 0041163E cmp ecx,eax 00411640 je search_slow+0A4h (411644h) 00411642 jmp search_slow+0A6h (411646h) i -= j; 00411644 jmp search_slow+72h (411612h) 00411646 mov eax,dword ptr [i] 00411649 sub eax,dword ptr [j] 0041164C mov dword ptr [i],eax if (j>= substr_len) return i; 0041164F mov eax,dword ptr [j] 00411652 cmp eax,dword ptr [substr_len] 00411655 jb search_slow+0BCh (41165Ch) 00411657 mov eax,dword ptr [i] 0041165A jmp search_slow+0C1h (411661h) } 0041165C jmp search_slow+58h (4115F8h) return -1; 0041165E or eax,0FFFFFFFFh } int search2(const char * substr, const char * str) { 004116B0 push ebp 004116B1 mov ebp,esp 004116B3 sub esp,108h 004116B9 push ebx 004116BA push esi 004116BB push edi 004116BC lea edi,[ebp-108h] 004116C2 mov ecx,42h 004116C7 mov eax,0CCCCCCCCh 004116CC rep stos dword ptr es:[edi] size_t substr_len = strlen(substr); 004116CE mov eax,dword ptr [substr] 004116D1 push eax 004116D2 call @ILT+275(_strlen) (411118h) 004116D7 add esp,4 004116DA mov dword ptr [substr_len],eax size_t str_len = strlen(str); 004116DD mov eax,dword ptr [str] 004116E0 push eax 004116E1 call @ILT+275(_strlen) (411118h) 004116E6 add esp,4 004116E9 mov dword ptr [str_len],eax if (substr_len > str_len || substr_len==0) return -1; 004116EC mov eax,dword ptr [substr_len] 004116EF cmp eax,dword ptr [str_len] 004116F2 ja search+4Ah (4116FAh) 004116F4 cmp dword ptr [substr_len],0 004116F8 jne search+52h (411702h) 004116FA or eax,0FFFFFFFFh 004116FD jmp search+0D6h (411786h) const char * str_high_bound, * substr_pos, * substr_high_bound, *str_pos; str_high_bound = str + str_len; 00411702 mov eax,dword ptr [str] 00411705 add eax,dword ptr [str_len] 00411708 mov dword ptr [str_high_bound],eax substr_high_bound = substr+ substr_len; 0041170B mov eax,dword ptr [substr] 0041170E add eax,dword ptr [substr_len] 00411711 mov dword ptr [substr_high_bound],eax for(str_pos = str; str_pos < str_high_bound; str_pos ++) { 00411714 mov eax,dword ptr [str] 00411717 mov dword ptr [str_pos],eax 0041171A jmp search+75h (411725h) 0041171C mov eax,dword ptr [str_pos] 0041171F add eax,1 00411722 mov dword ptr [str_pos],eax 00411725 mov eax,dword ptr [str_pos] 00411728 cmp eax,dword ptr [str_high_bound] 0041172B jae search+0D3h (411783h) for(substr_pos = substr; substr_pos<substr_high_bound; str_pos++, substr_pos++) { 0041172D mov eax,dword ptr [substr] 00411730 mov dword ptr [substr_pos],eax 00411733 jmp search+97h (411747h) 00411735 mov eax,dword ptr [str_pos] 00411738 add eax,1 0041173B mov dword ptr [str_pos],eax 0041173E mov ecx,dword ptr [substr_pos] 00411741 add ecx,1 00411744 mov dword ptr [substr_pos],ecx 00411747 mov eax,dword ptr [substr_pos] 0041174A cmp eax,dword ptr [substr_high_bound] 0041174D jae search+0B3h (411763h) if (*str_pos != *substr_pos) break; 0041174F mov eax,dword ptr [str_pos] 00411752 movsx ecx,byte ptr [eax] 00411755 mov edx,dword ptr [substr_pos] 00411758 movsx eax,byte ptr [edx] 0041175B cmp ecx,eax 0041175D je search+0B1h (411761h) 0041175F jmp search+0B3h (411763h) } 00411761 jmp search+85h (411735h) str_pos -= (substr_pos - substr); 00411763 mov eax,dword ptr [substr_pos] 00411766 sub eax,dword ptr [substr] 00411769 mov ecx,dword ptr [str_pos] 0041176C sub ecx,eax 0041176E mov dword ptr [str_pos],ecx if (substr_pos >= substr_high_bound) return (str_pos - str); 00411771 mov eax,dword ptr [substr_pos] 00411774 cmp eax,dword ptr [substr_high_bound] 00411777 jb search+0D1h (411781h) 00411779 mov eax,dword ptr [str_pos] 0041177C sub eax,dword ptr [str] 0041177F jmp search+0D6h (411786h) } 00411781 jmp search+6Ch (41171Ch) return -1; 00411783 or eax,0FFFFFFFFh } 【 在 StarFallLuna 的大作中提到: 】 : ~~~~~~~~I think there may be some problem here : try sub="abc" str="aabc" : I have no idea about the input situation.So I can't figure out how much this statement helps : ...................
Jarod机器人#6 · 2007/3/18
如果想测试: 主串:ababababcccbababccbcbabaaacbcbcacbaacbabcababcbcababcbcbcabacbcbabcbabcbcaaacbabcbcaabacbabccacabacbcbcbcbcbca 子串:cbcbcbcbcbcb 不过,这个数据比较简单了。得出的结果是前者更快。
RemoteFish机器人#7 · 2007/3/18
我把楼主把汇编得到的代码做了一个 diff (a.s 是 search1, b.s 是 search2, 并去掉了地址), 可以看一下区别 diff -Naur {a,b}.s --- a.s 2007-03-18 10:20:02.257887740 +0800 +++ b.s 2007-03-18 10:20:11.731447540 +0800 @@ -1,12 +1,12 @@ -int search1(const char * substr, const char * str) { +int search2(const char * substr, const char * str) { push ebp mov ebp,esp -sub esp,0F0h +sub esp,108h push ebx push esi push edi -lea edi,[ebp-0F0h] -mov ecx,3Ch +lea edi,[ebp-108h] +mov ecx,42h mov eax,0CCCCCCCCh rep stos dword ptr es:[edi] size_t substr_len = strlen(substr); @@ -24,56 +24,68 @@ if (substr_len > str_len || substr_len==0) return -1; mov eax,dword ptr [substr_len] cmp eax,dword ptr [str_len] -ja search_slow+4Ah (4115EAh) +ja search+4Ah (4116FAh) cmp dword ptr [substr_len],0 -jne search_slow+4Fh (4115EFh) +jne search+52h (411702h) or eax,0FFFFFFFFh -jmp search_slow+0C1h (411661h) - int i,j; - for (i = 0; i< str_len; i++) { -mov dword ptr [i],0 -jmp search_slow+61h (411601h) -mov eax,dword ptr [i] +jmp search+0D6h (411786h) + const char * str_high_bound, * substr_pos, * substr_high_bound, *str_pos; + str_high_bound = str + str_len; +mov eax,dword ptr [str] +add eax,dword ptr [str_len] +mov dword ptr [str_high_bound],eax + substr_high_bound = substr+ substr_len; +mov eax,dword ptr [substr] +add eax,dword ptr [substr_len] +mov dword ptr [substr_high_bound],eax + for(str_pos = str; str_pos < str_high_bound; str_pos ++) { +mov eax,dword ptr [str] +mov dword ptr [str_pos],eax +jmp search+75h (411725h) +mov eax,dword ptr [str_pos] add eax,1 -mov dword ptr [i],eax -mov eax,dword ptr [i] -cmp eax,dword ptr [str_len] -jae search_slow+0BEh (41165Eh) - for (j=0; j<substr_len; i++,j++) -mov dword ptr [j],0 -jmp search_slow+84h (411624h) -mov eax,dword ptr [i] +mov dword ptr [str_pos],eax +mov eax,dword ptr [str_pos] +cmp eax,dword ptr [str_high_bound] +jae search+0D3h (411783h) + for(substr_pos = substr; substr_pos<substr_high_bound; str_pos++, substr_pos++) { +mov eax,dword ptr [substr] +mov dword ptr [substr_pos],eax +jmp search+97h (411747h) +mov eax,dword ptr [str_pos] add eax,1 -mov dword ptr [i],eax -mov ecx,dword ptr [j] +mov dword ptr [str_pos],eax +mov ecx,dword ptr [substr_pos] add ecx,1 -mov dword ptr [j],ecx -mov eax,dword ptr [j] -cmp eax,dword ptr [substr_len] -jae search_slow+0A6h (411646h) - if (substr[j] != str[i]) break; -mov eax,dword ptr [substr] -add eax,dword ptr [j] +mov dword ptr [substr_pos],ecx +mov eax,dword ptr [substr_pos] +cmp eax,dword ptr [substr_high_bound] +jae search+0B3h (411763h) + if (*str_pos != *substr_pos) break; +mov eax,dword ptr [str_pos] movsx ecx,byte ptr [eax] -mov edx,dword ptr [str] -add edx,dword ptr [i] +mov edx,dword ptr [substr_pos] movsx eax,byte ptr [edx] cmp ecx,eax -je search_slow+0A4h (411644h) -jmp search_slow+0A6h (411646h) - i -= j; -jmp search_slow+72h (411612h) -mov eax,dword ptr [i] -sub eax,dword ptr [j] -mov dword ptr [i],eax - if (j>= substr_len) return i; -mov eax,dword ptr [j] -cmp eax,dword ptr [substr_len] -jb search_slow+0BCh (41165Ch) -mov eax,dword ptr [i] -jmp search_slow+0C1h (411661h) +je search+0B1h (411761h) +jmp search+0B3h (411763h) + } +jmp search+85h (411735h) + str_pos -= (substr_pos - substr); +mov eax,dword ptr [substr_pos] +sub eax,dword ptr [substr] +mov ecx,dword ptr [str_pos] +sub ecx,eax +mov dword ptr [str_pos],ecx + if (substr_pos >= substr_high_bound) return (str_pos - str); +mov eax,dword ptr [substr_pos] +cmp eax,dword ptr [substr_high_bound] +jb search+0D1h (411781h) +mov eax,dword ptr [str_pos] +sub eax,dword ptr [str] +jmp search+0D6h (411786h) } -jmp search_slow+58h (4115F8h) +jmp search+6Ch (41171Ch) return -1; or eax,0FFFFFFFFh -} +}
Jarod机器人#8 · 2007/3/18
没有看出来影响效率的关键区别
RemoteFish机器人#9 · 2007/3/18
rf@debian:~/shm$ grep '^\+' s.diff | wc -l 59 rf@debian:~/shm$ grep '^-' s.diff | wc -l 47 就是说, search2 明显比 search1 多使用了 12 条汇编指令. 虽然代码数量并不与速度成正比, 但我们可以看到程序结构是一样的, 那么我们差不多可以肯定这多出来的代码是起到了反作用的. 楼主不妨想一想, 这多出来的代码究竟做了些什么.