- Loop Interchange : Loops are reordered to minimize the stride and align the access pattern in the loop with the pattern of data storage in memory
Example: In C
for (I=1,4)
for (J=1,4)
a[J][I] = ...
after Interchanging
for (J=1,4)
for (I=1,4)
a[J][I] = ...
- Loop Fusion : Adjacent or closely located loops fused into one single loop.
{
int i;
for (i = 0; i<nodes;i++)
{
a[i] = a[i] * small;
c[i] = (a[i]+b[i])*relaxn;
}
for (i = 1; i<nodes - 1;i++)
{
d[i] = c[i] - a[i];
}
}
void fusion()
{
int i;
a[0] = a[0]*small;
c[0] = (a[0]+b[0])*relaxn;
a[nodes - 1] = a[nodes - 1] + b[nodes - 1]) * relaxn;
for (i = 1; i < nodes - 1;i++)
{
a[i] = a[i] * small;
c[i] = (a[i] + b[i]) * relaxn;
d[i] = c[i] - a[i];
}
}
- Loop Fission : Split the original loop if it is worthwhile
void nofission()
{
int i, a[100], b[100];
for (i = 0; i < 100; i++)
{
a[i]=1;
b[i]=2;
}
}
void fission()
{
int i, a[100], b[100];
for (i = 0; i < 100; i++)
{
a[i]=1;
}
for (i = 0; i < 100; i++)
{
b[i] = 2;
}
}
- Loop Peeling: Peel-off the edge iterations of the loop.
Before peeling:
for (i = 1; N; i++)
{
if (i==1) x[i]=0;
else
if (i==N) x[i]=N;
else
x[i] = x[i] + y[i];
}
After peeling:
x[i]=0;
for (i = 2; i < N; i++)
x[i] = x[i] + y[i];
x[N] = N;
- Loop Unrolling: Reduced the effect of branches
Before unrolling:
do i=1,N
y[i] = x[i]
enddo
After unrolling by a factor of four:
nend = 4*(N/4)
do i=1,N,4
y[i] = x[i]
y[i+1] = x[i+1]
y[i+2] = x[i+2]
y[i+3] = x[i+3]
enddo
do i = nend + 1, N
y[i] = x[i]
enddo
No comments:
Post a Comment