Aktív témák
-
P.H.
senior tag
Skeleton of code adjusted to Zen(1) and Zen+ (i. e. placeholder codes omitted).
Runs constantly at 3.4 IPC (of maximum 4.0 due to the 4 available ALU-s) for 50x50 matrices, this means 85% utilization.
Since more than 90% percent of instructions need ALU, so important theorem is the way of instruction distribution among the ALUs. This is true for all other microarchitectures as well.
{ } movsx ebx,byte ptr [eax+E.FIELD0+00h]
{ } mov esi,ebp
{ } xor edx,edx
{ } and esi,-8
{ } mov [edi+__A],ebp
{ } mov ecx,ebp
@init:
{ } mov [edi+esi*08h+(00h*08h)+__B],edx
{ } mov [edi+esi*08h+(04h*08h)+__B],edx
{ } sub esi,-8
{ } jnz @init
@@a:
{ } cmp ebx,ebp
{ } lea esi,[ebp+ebx*04h]
{ } movsx ebx,byte ptr [eax+E.FIELD0+01h]
{ } lea eax,[eax+01h]
{ } mov [edi+esi*08h+__B],ebp
{ } cmova esi,edx
{ } mov [edi+ecx*08h+__C],esi
{ } add ecx,04h
{ } jnz @@a
{ } mov eax,[edi+ebp*08h+__C]
{ } mov ebx,ebp
{ } mov edx,edi
{ } jmp @b
@@ROWS:
{ } mov eax,[edi+ebx*08h+(04h*08h)+__C]
{ } add ebx,04h
{ } jz @@COLS
@next0b:
{ } mov [edi+ebx*08h+__D],eax
{ } sub edx,ebp
{ } mov [edi+ebx*08h+R],eax
{ } add eax,ebp
{ } jc @@ROWS
{ } lea ecx,[ebp+04h]
{ } mov esi,[edx+ebp]
{ } or esi,[edi+ebp*08h+__B]
{ } and ecx,-8
{ } mov ebp,[edi+ecx*08h+__B]
@findr:
{ } or ebp,[edx+ecx+00h]
{ } cmp ebp,eax
{ } cmovb eax,ebp
{ } mov ebp,[edx+ecx+04h]
{ } or ebp,[edi+ecx*08h+(04h*08h)+__B]
{ } cmp ebp,esi
{ } cmovb esi,ebp
{ } mov ebp,[edi+ecx*08h+(08h*08h)+__B]
{ } add ecx,08h
{ } jnz @findr
{ } cmp eax,esi
{ } mov ebp,[edi+__A]
{ } cmovb esi,eax
{ } neg esi
{ } mov [edi+__L+ebx*08h+__F],esi
{ } jle @ROWS
@@EXIT:
{ } mov esi,[esp+_X]
{ } mov [esi+E.O],7FFFFFFFh
{ } jmp @outside
@free0col:
{ } add [edi+__0],ebp
{ } mov [edi+__Y],esi
{ } mov ecx,0FFFFFFFFh
@@COLS:
{ } mov [edi+ebp*08h-(04h*08h)+__B],ebx
@mark:
{ } mov [edi+__L+ebx*08h+__CC],esi
{ } mov esi,[edi+__L+ebp*08h+__F]
{ } mov edx,ebp
@nextc:
{ } lea eax,[ebx-04h]
{ } mov [edi+__L+ebx*08h+__M],ecx
{ } mov ecx,[edi+ebx*08h-(04h*08h)+__B]
{ } mov ebx,eax
{ } sub eax,ebp
{ } jc @@init0
{ } xor ecx,-1
{ } jns @nextc
@findc:
{ } add esi,[eax+edi]
{ } or esi,[edi+edx*08h+__D]
{ } lea edx,[edx+04h]
{ } jz @testr
{ } sub eax,ebp
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov esi,[edi+__L+edx*08h+__F]
{ **} cmp edx,00h
{ **} jnz @findc
{ } mov eax,ebx
{ } mov edx,ebp
{ } mov esi,ecx
{ **} cmp ecx,00h
{ **} js @@EXIT
@seek0:
{ } mov ecx,[edi+__L+edx*08h+__F]
{ } sub eax,ebp
{ } add edx,04h
{ } jg @free0col
{ } add ecx,[eax+edi]
{ **} cmp ecx,esi
{ **} jnz @seek0
@testr:
{ } lea ecx,[edx-04h]
{ **} test [edi+edx*08h-(04h*08h)+__C],ebx
{ **} js @seek0col
{ } mov [edi+ebx*08h+__B],ecx
{ } mov [edi+edx*08h-(04h*08h)+__C],ebx
{ } xor ecx,ecx
{ } jmp @mark
@@init0:
{ } mov [edi+__I],ecx
{ } mov eax,ecx
{ } sal ecx,10h
{ } jnz @scan
{ } mov eax,edi
{ } mov esi,[esp+_X]
{ } jmp outside
@@1ST_STEP:
{ } movsx esi,ax
{ } mov esi,[edi+esi*08h+__C]
{ } neg edx
@1ST_STEP:
{ } movsx ecx,byte ptr [edi+__L+ebx*08h+__S+__M]
{ } and ecx,edx
{ } sub [edi+__L+ebx*08h+__CC],ecx
{ } movsx ecx,byte ptr [edi+ebx*08h+__S+R]
{ } and ecx,edx
{ } sub [edi+__L+ebx*08h+__F],ecx
{ } add ebx,04h
{ } jnz @1ST_STEP
{ } mov ecx,[edi+__I]
{ } movsx ebx,ax
{ } sar eax,10h
{ **} test esi,esi
{ **} jz @@2ND_STEP
{ } add dword ptr [edi+__A],-1
{ } mov [edi+ebx*08h+R],eax
{ } mov [edi+__L+esi*08h+__M],ebx
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov ebx,ebp
{ } mov [edi+__I],ecx
{ } jmp @@6TH_STEP
@pass:
{ } mov eax,ecx
{ } sar ecx,10h
{ } cmovnc eax,[edi+__W]
{ } mov [edi+__W],eax
{ } lea ebx,[ebp+00h]
{ } mov [edi+__L+ecx*08h+__M],esi
@nx:
{ } mov esi,[edi+__L+ecx*08h+__M +(04h*08h)]
{ } add ecx,04h
{ } jz @@1ST_STEP
{ **} cmp esi,[edi+__A]
{ **} jbe @nx
@@6TH_STEP:
{ } mov esi,[edi+__L+ecx*08h+__CC]
{ } lea ebx,[ebp-04h]
{ } { x1 } nop
{ } mov eax,ecx
{ } mov [edi+__Y],esi
@ffd:
{ } sal ecx,10h
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
@z:
{ **} cmp ebx,-4
{ **} jz @pass
@scan:
{ } add ebx,04h
{ } sub eax,ebp
{ } sub esi,[edi+__Y]
{ } add esi,[edi+eax]
{ } or esi,[edi+ebx*08h+R]
{ } jz @@5TH_STEP
{ } cmp esi,edx
{ } cmovb cx,bx
{ } cmovb edx,esi
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
{ **} cmp ebx,-4
{ **} jz @pass
{ } add ebx,04h
{ } sub eax,ebp
{ } sub esi,[edi+__Y]
{ } add esi,[edi+eax]
{ } or esi,[edi+ebx*08h+R]
{ } jz @@5TH_STEP
{ } cmp esi,edx
{ } cmovb cx,bx
{ } cmovb edx,esi
{ } mov esi,[edi+__L+ebx*08h+(04h*08h)+__F]
{ } db $66,$66,$66; nop
{ } jmp @zZ
@@5TH_STEP:
{ } mov esi,ecx
{ } sar ecx,10h
{ } cmovnc esi,[edi+__W]
{ } mov [edi+__W],esi
{ } mov esi,[edi+ebx*08h+__C]
{ **} test esi,esi
{ **} jz @2ND_STEP
{ } mov [edi+ebx*08h+R],ecx
{ } mov [edi+__L+esi*08h+__M],0FFFFFFFFh
{ **} cmp word ptr [edi+__W],bx
{ **} jz @re
{ **} cmp esi,ecx
{ **} jae @ffd
{ } mov ecx,esi
{ } mov eax,[edi+__I]
{ } cmp esi,eax
{ } cmovb eax,esi
{ } mov [edi+__I],eax
{ } jmp @@6TH_STEP
@re:
{ } mov ecx,[edi+__I]
{ } add dword ptr [edi+__A],-1
{ } mov edx,esi
{ } cmp esi,ecx
{ } cmovb ecx,esi
{ } mov [edi+__I],ecx
{ } jmp @@6TH_STEP
@@2ND_STEP:
{ } mov ecx,eax
@2ND_STEP:
{ } mov [edi+ebx*08h+__C],ecx
{ } mov edx,[edi+ecx*08h+__B]
{ } mov [edi+ecx*08h+__B],ebx
{ } mov ecx,[edi+edx*08h+R]
{ } mov ebx,edx
{ } add edx,ebp
{ } jc @2ND_STEP
{ } mov ecx,esi
{ } sub [edi+__0],ebp
{ } jz @@outside
@@9ST_STEP:
{ } mov eax,[edi+esi*08h-(04h*08h)+__B]
{ } xor eax,-1
{ } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
{ } lea esi,[esi-04h]
{ } cmovs ecx,esi
{ } mov ebx,[edi+esi*08h+__D]
{ } mov eax,[edi+esi*08h-(04h*08h)+__B]
{ } mov [edi+esi*08h+R],ebx
{ **} cmp ebp,esi
{ **} jz @i9
{ } xor eax,-1
{ } mov [edi+__L+esi*08h-(04h*08h)+__M],eax
{ } lea esi,[esi-04h]
{ } cmovs ecx,esi
{ } mov eax,[edi+esi*08h+__D]
{ } mov [edi+esi*08h+R],eax
{ **} cmp ebp,esi
{ **} jnz @@9ST_STEP
@i9:
{ } mov [edi+__I],ecx
{- } jmp @@6TH_STEP[ Szerkesztve ]
Arguing on the Internet is like running in the Special Olympics. Even if you win, you are still ... ˙˙˙ Real Eyes Realize Real Lies ˙˙˙
Aktív témák
- Autós topik
- Vicces képek
- AMD Navi Radeon™ RX 6xxx sorozat
- Futás, futópályák
- Vodafone otthoni szolgáltatások (TV, internet, telefon)
- Óvodások homokozója
- Xbox tulajok OFF topicja
- f(x)=exp(x): A laposföld elmebaj: Vissza a jövőbe!
- Júliustól kötelező biztosítást kell fizetni egyes rollerek után is!
- Vezetékes FEJhallgatók
- További aktív témák...
Állásajánlatok
Cég: Ozeki Kft.
Város: Debrecen