Aktív témák
-
P.H.
senior tag
Eheti 1000-mátrix-hetente rovat
Core2 (2.5 GHz): 60 sec alatt 510000 mátrix
{@04-} { x1 } movsx ebx,byte ptr es:[edx]
{1-} xor eax,eax
{2-} mov esi,ebp
{0} and esi,-8
@init:
{@0F} mov [edi+esi*08h+(00h*08h)+__0STARROW],eax
{1} mov [edi+esi*08h+(04h*08h)+__0STARROW],eax
{2*} add esi,08h
{0*} jnz @init { clears ESI register }
{ } add edx,01h
{ -} mov ecx,ebp
@@ARGUMENT: { K10:2.6 Core2:2.9 - 3.3 uop/clk - 1640*2+6550 }
{@20} cmp ebx,esi { 4 AGU + 9 EX uops on Kaveri }
{1} lea eax,[ebp+ebx*04h+00h] { 3 clk 8 ALU ops on Core 2 }
{2} movsx ebx,[edx]
{0} lea edx,[edx+01h]
{1} mov [edi+eax*08h+__0STARROW],ebp { __0COUNTER <- EBP }
{2} cmovs eax,esi
{0} mov [edi+ecx*08h+__FIXEDROW],eax
{1*} add ecx,04h
{2*} jnz @@ARGUMENT { clears ECX register }
{ } add esp,ebp
{ -} mov eax,edi
{ -} push ebp
{@40-} lea edx,[ebp-04h]
@@REDUCE_ROWS:
{@43} mov [edi+edx*08h+__ROWMODIFIER],ecx
{1} mov esi,[edi+edx*08h+(04h*08h)+__FIXEDROW]
{2*} add edx,04h
{0*} jz @@REDUCE_COLUMNS
{@50} mov [edi+edx*08h+__0STAR],esi
{2-} xor ecx,ecx
{0} sub eax,ebp
{1**} test esi,esi { JS/JNS can only fuse with TEST }
{2**} js @@REDUCE_ROWS
{ -} mov ebx,ebp { EBX < 0 for even minimum }
{ } mov ecx,[eax+ebp]
{@61} or ecx,[edi+ebp*08h+__0STARROW]
{ } and ebp,04h
{ } add ebp,ebx
{@69} @findrowmin: { K10:2.8 Core2:2.2 - 2.6 uop/clk - 1100*2+5000 }
{0} mov esi,[eax+ebp+00h] { 4 AGU + 8 EX uops on Kaveri }
{1} or esi,[edi+ebp*08h+(00h*08h)+__0STARROW] { 3 clk 10 ALU ops on Core 2 }
{2} add ebp,08h
{@72} cmp esi,ebx
{1} cmovb ebx,esi
{2} mov esi,[eax+ebp-04h]
{0} or esi,[edi+ebp*08h-(04h*08h)+__0STARROW]
{1} cmp esi,ecx
{@81} cmovb ecx,esi
{0**} test ebp,ebp
{1**} jnz @findrowmin
{ } mov ebp,[esp+00h]
{ } cmp ebx,ecx
{ } cmovb ecx,ebx
{@90} neg ecx
{ } jle @@REDUCE_ROWS
@@ABNORMAL_EXIT:
{@94} pop eax
{1} sub esp,ebp
{2} mov edx,0FFFFFFFFh
{0} mov esi,[esp+__MARKS]
{@A0} mov [esi+TRESULT.OPTIMUM],edx
{2} mov ebx,[esi+TRESULT.NEXTIVALUE]
{0} jmp dword ptr [esp+_INVALIDRESULT]
{ } { x6 } test ebp,0FFFFFFFFh
{@90} @initcol:
{0} neg dword ptr [esp+00h]
{1-} mov esi,ebp
{2} neg ebp
{0} mov [edi+__INITCOL],ecx
{1} or ebx,-1
{2} jmp @@1ST_STEP { long jump instruction }
{@A2} @free0col:
{ } lea ecx,[edx-04h]
{@A5} @setcolmod:
{ } mov [edi+edx*08h+__COLMODIFIER],esi
@@REDUCE_COLUMNS:
{1**} jz @initcol
{0} sub edx,04h
{@B0-} xor esi,esi
{1**} test [edi+edx*08h+__0STARROW],ebp
{2**} js @setcolmod
{ } lea ebx,[edi+edx]
{ -} mov ecx,ebp
{ -} mov eax,ebp
{ } sub ebx,ebp
{@C0} @findcolmin: { K10:3.0 Core2:_._ - _._ uop/clk - ____*2+____
{0} mov esi,[ebx] { 3 AGU + 8 EX uops on Kaveri }
{1} add esi,[edi+ecx*08h+__ROWMODIFIER] { 3 clk 9 ALU ops on Core 2 }
{2} or esi,[edi+ecx*08h+__FIXEDROW]
{0} jz @test0row
{1} sub ebx,ebp
{2} cmp esi,eax
{@D0} cmovb eax,esi
{1*} add ecx,04h
{2*} jnz @findcolmin
{ } lea ecx,[ebp-04h]
{ -} mov esi,eax
{ } lea ebx,[edi+edx]
{@E0**} test eax,eax { JS/JNS can only fuse with TEST }
{ **} js @@ABNORMAL_EXIT
{@E4} @seekcol0:
{0} mov eax,[edi+ecx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ecx,04h
{2*} jz @free0col
{0} sub ebx,ebp
{1} add eax,[ebx]
{@F1**} cmp eax,esi { maximum data value = 00FFFFFFh -> marked elements stay negative }
{0**} jnz @seekcol0
@test0row:
{ **} test [edi+ecx*08h+__0STAR],ebp
{ **} js @seekcol0
{ } mov [edi+edx*08h+__0STARROW],ecx
{@FE} mov [edi+ecx*08h+__0STAR],edx
{@02} jns @free0col { forced conditional jump for Sandy Bridge }
{ ----------------------------------------------------------------------------------------------- }
{@04} { x12 } mov eax,00000000h; mov edx,00000000h; xor ebp,ebp
{@10} { x5 } mov ecx,00000000h
@@5TH_STEP: { K10:2.6 Core2:2.4 - 2.8 uop/clk - 2000*2+5100
{@15} mov eax,[edi+__INITCOL] { lea eax,[ebp+04h]; neg eax }
{1} mov esi,[esp+__SIZE]
{2} movsx ebx,word ptr [edi+__MINCOLROW]
{@20} @DEC5_free_col: { 3 AGU + 6 EX uops on Kaveri }
{0} add [edi+eax*08h+__COLMODIFIER],ecx { 2 clk 5 ALU ops on Core 2 }
{1} mov ecx,[edi+eax*08h+(04h*08h)+__COLMARK]
{2} sar ecx,1Fh
{0} and ecx,edx
{1*} add eax,04h
{@30*} jnz @DEC5_free_col { clears EAX register [NOT USED] }
{ } mov eax,[esp+__SIZE+esi*04h]
{ } movsx ecx,word ptr [edi+__MINCOLROW+02h]
{ } jmp @INC5_marked_row
{ x4 } xor ebp,ebp; xor esi,esi
{@40} @inc5row:
{0} add [edi+eax*08h+__ROWMODIFIER],edx { 4 AGU + 4 EX uops on Kaveri }
{1-} mov eax,ebp
@INC5_marked_row:
{2} mov ebp,[esp+esi*04h]
{0*} sub esi,01h
{1*} jge @inc5row { sets ESI to 0FFFFFFFFh }
@@3RD_STEP:
{@4E*} and esi,[edi+ebx*08h+__0STAR]
{@52*} jz @4TH_STEP { long jump instruction }
{@58} @re3start:
{ } mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{ } { x1 } mov ecx,es:[edi+__INITCOL] { lea ecx,es:[ebp-04h] }
{@60-} mov edx,ebx
{@62} @mark3row:
{ } mov [esp+__OFFS+eax*04h],ebx
{ -} xor ebx,ebx
{ } mov [edi+esi*08h+__COLMARK],esi { unmark column with negative }
{ } inc eax
{ } mov [esp+__SIZE],eax
{@71} @chk2col:
{0*} add ecx,04h
{1*} jz @@5TH_STEP { clears ECX register }
{2**} test [edi+ecx*08h+__COLMARK],ecx { STORE FORWARDED from @mark3row }
{0**} jns @chk2col
@@2ND_STEP:
{12} push dword ptr [edi+ecx*08h+__COLMODIFIER]
{@80} lea eax,[ecx+edi]
{ } sub ebx,ebp
{ } sal ecx,10h
{ } mov esi,[edi+ebx*08h+__ROWMODIFIER]
{@8C} @ZERO2col: { K10:3.0 Core2:2.5 - 2.9 uop/clk - 1500*2+5600 { 4 AGU + 11 EX uops on Kaveri }
{0} sub esi,[esp+00h] { 4 clk 13 ALU ops on Core 2 }
{@8F} add esi,[eax+ebp]
{C2D} lea eax,[eax+ebp] { Core 2, Kaveri }
{2} jo @over2flow { overflow: (-x)+(-y)=(+z) or (+x)+(+y)=(-z) }
{0} or esi,[edi+ebx*08h+__0COLON___ROWMARK]
{1} jz @zero
{K10}// lea eax,[eax+ebp] { K10, Sandy Bridge, Ivy Bridge }
{0} cmp esi,edx
{@9F} cmovb edx,esi
{@A2} cmovb cx,bx
@over2flow:
{0} mov esi,[edi+ebx*08h+(04h*08h)+__ROWMODIFIER]
{1*} add ebx,04h
{2*} jnz @ZERO2col { clears EBX register }
{@AF} @zero:
{0} pop eax { add esp,04h } { forces ESP handling to AGU/memory pipe on Kaveri/Core }
{@B0-} mov eax,ecx
{2} sar ecx,10h
{0} cmovnc eax,[edi+__MINCOLROW]
{1} mov [edi+__MINCOLROW],eax
{2**} test ebx,ebx
{0**} jz @chk2col
{@C0*} add esi,[edi+ebx*08h+__0STAR] { zero found -> ESI=0 }
{2*} jz @4TH_STEP
{0} cmp ax,bx
{1} { x1 } mov eax,ss:[esp+__SIZE]
{2} jz @re3start
{@D0} cmp esi,ecx
{1} mov [edi+ebx*08h+__0COLON___ROWMARK],ecx { set row mark }
{2} cmovl ecx,esi
{0*} sub ecx,04h { never clears ECX register }
{1*} jnz @mark3row { forced conditional jump for Sandy Bridge }
{ x2 } xor esi,esi
{@E0} { x4 } lea eax,[ebp+ebp+00h]
@@4TH_STEP: { 5 AGU + 3 EX uops on Kaveri }
{@E4-} mov ebx,edx { 2 clk 2 ALU ops on Core 2 }
@4TH_STEP:
{@E6} mov edx,[edi+ecx*08h+__0STARROW]
{2} mov [edi+ebx*08h+__0STAR],ecx
{0} mov [edi+ecx*08h+__0STARROW],ebx
{@F0} mov ecx,[edi+edx*08h+__0COLON___ROWMARK]
{2**} cmp edx,00h
{0**} jnz @@4TH_STEP { clears EDX register }
{ } sub esi,ebp
{ } sub edx,ebp
{ } lea ecx,[esi-04h] { mov ecx,[edi+__INITCOL] }
@@1ST_STEP: { K10:2.8 Core2:2.9 - 3.2 uop/clk - 1500*2+6100 }
{@00} mov eax,[edi+esi*08h+__0STARROW] { 4 AGU + 7 EX uops on Kaveri }
{1} and ebx,eax { 3 clk 6 ALU ops on Core 2 }
{2} not eax
{0} mov [edi+esi*08h+__COLMARK],eax
{1} mov eax,[edi+esi*08h+__FIXEDROW]
{2} cmovs ecx,esi
{0} mov [edi+esi*08h+__0COLON___ROWMARK],eax
{1*} add esi,04h
{2*} jnz @@1ST_STEP { clears ESI register }
{ } mov [esp+__SIZE],esi
{ -} xor ebx,ebx
{@21*} add ecx,04h { long jump instruction }
{ *} jnz @@2ND_STEP { ===>>> EBX: 00h EDX:negative ECX:initcol (>= EBP) }
{ } mov esi,[esp+ebp+04h+__MARKS]
{ -} mov ebx,edi { work matrix unmodified } { [esp+__SAVE] }
@@results:
{@30} mov eax,[edi+edx*08h+__0STAR] { 3 AGU + 8 EX uops on Kaveri }
{1} add ebx,ebp
{2} add ecx,[ebx+eax]
{0} add eax,ebp
{1} shr eax,02h
{2} mov [esi],al
{@40} add esi,01h
{1*} add edx,04h
{2*} jnz @@results { clears EDX register ( DL=0 as head, DH=0 as length ) }
{0} pop eax
{1} add esp,ebp
{2} neg ebp
{0} or eax,-1
{@50} lea ebx,[edi+ebp*04h]
{1} sar ebp,02h
{2} mov [esi+ebp+TRESULT.OPTIMUM],ecx
{0} add esi,ebp
{1-} xor ecx,ecx
{2} jmp @onchain[ Szerkesztve ]
Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙
Aktív témák
- Fejhallgató erősítő és DAC topik
- Politika
- Motoros topic
- Microsoft Excel topic
- exHWSW - Értünk mindenhez IS
- Xiaomi 13T és 13T Pro - nincs tétlenkedés
- Intel Core i5 / i7 / i9 "Alder Lake-Raptor Lake/Refresh" (LGA1700)
- Otthoni hálózat és internet megosztás
- Samsung Galaxy S24 Ultra - ha működik, ne változtass!
- Marvel Snap
- További aktív témák...
Állásajánlatok
Cég: Ozeki Kft.
Város: Debrecen