Before:
if( Condition ) {
Case A;
} else {
Case B;
} |
After:
Case B;
if( Condition ) {
Undo Case B;
Case A;
}
|
Before:
for(i=0;i<10;i++) {
printf("%d\n",i*10);
}
|
After:
for(i=0;i<100;i+=10) {
printf("%d\n",i);
}
|
Before:
char playfield[80][25];
|
After:
char playfield[80][32];
|
Before:
for(i=0;i<100;i++) {
map[i].visited = 0;
}
|
After:
i=99;
do {
map[i].visited = 0;
i--;
} while(i>=0);
|
Before:
char x;
int y;
y = x;
|
After:
int x, y;
y = x;
|
Before:
void swap(int *x, int *y) {
int t;
t = *y;
*y = *x;
*x = t;
}
|
After:
static void swap(int *x, int *y) {
int t;
t = *y;
*y = *x;
*x = t;
}
|
Before:
float f[100],sum;
...
/* Back to back dependent
adds force the thoughput
to be equal to the total
FADD latency. */
sum = 0;
for(i=0;i<100;i++) {
sum += f[i];
}
|
After:
float f[100],sum0,sum1;
float sum2,sum3,sum;
...
/* Optimized for a 4-cycle
latency fully pipelined
FADD unit. The throughput
is one add per clock. */
sum0 = sum1 = sum2 = sum3 = 0;
for(i=0;i<100;i+=4) {
sum0 += f[i];
sum1 += f[i+1];
sum2 += f[i+2];
sum3 += f[i+3];
}
sum = (sum0+sum1) + (sum2+sum3);
|
Before:
x = y % 32;
x = y * 8;
x = y / w + z / w;
if( a==b &&c==d &&e==f ) {...}
if( (x &1) || (x &4) ) {...}
if( x>=0 &&x<8 &&
y>=0 &&y<8 ) {...}
if( (x==1) || (x==2) ||
(x==4) || (x==8) || ... )
if( (x==2) || (x==3) || (x==5) ||
(x==7) || (x==11) || (x==13) ||
(x==17) || (x==19) ) {...}
#define abs(x) (((x)>0)?(x):-(x))
int a[3][3][3];
int b[3][3][3];
...
for(i=0;i<3;i++)
for(j=0;j<3;j++)
for(k=0;k<3;k++)
b[i][j][k] = a[i][j][k];
for(i=0;i<3;i++)
for(j=0;j<3;j++)
for(k=0;k<3;k++)
a[i][j][k] = 0;
for(x=0;x<100;x++) {
printf("%d\n",(int)(sqrt(x)));
}
unsigned int x, y, a;
...
a = (x + y) >>1;
/* overflow fixup */
if( a <x &&a <y ) a += 0x8000000;
c:\>tc myprog.c
user% cc myprog.c
Use the quicksort algorithm.
Use Bresenham's line algorithm.
Look through school notes for ideas.
Ignore suggestions from others.
Code, code, code, code ...
|
After:
x = y &31;
x = y <<3;
x = (y + z) / w;
if( ((a-b)|(c-d)|(e-f))==0 ) {...}
if( x & 5 ) {...}
if( ((unsigned)(x|y))<8 ) {...}
if( x&(x-1)==0 &&x!=0 )
if( (1<<x) & ((1<<2)|(1<<3)|(1<<5)|(1<<7)
\
|(1<<11)|(1<<13)|(1<<17)|(1<<19)) ) {...}
static long abs(long x) {
long y;
y = x>>31; /* Not portable */
return (x^y)-y;
}
typedef struct {
int element[3][3][3];
} Three3DType;
Three3DType a,b;
...
b = a;
memset(a,0,sizeof(a));
for(tx=1,sx=0,x=0;x<100;x++) {
if( tx<=x ) {
tx+=2*sx+3;
sx++;
}
printf("%d\n",sx);
}
unsigned int x, y, a;
...
a = (x & y) + ((x ^ y)>>1);
c:\>wpp386 /6r/otexan myprog.c
user% gcc -O3 myprog.c
Use radix, intro or heap(!) sort.
Use fixed point DDA line algorithm.
Get examples from USENET/WEB/FTP
Get suggestions but be skeptical.
Think, code, think, code ...
|
http://www.azillionmonkeys.com/qed/optimize.html
http://itguru.tistory.com/129