Wednesday, June 13, 2012

Enable hibernate in ubuntu 12.04

Hibernate is disable by default in ubuntu 12.04 because of a bug. Follow below instructions to enable hibernate back.

Get the terminal and copy & paste this code
$gedit /var/lib/polkit-1/localauthority/10-vendor.d/com.ubuntu.desktop.pkla

then it will prompt a new file in gedit and do changes like below

[Re-enable hibernate]
Identity=unix-user:*
Action=org.freedesktop.upower.hibernate
ResultActive=yes

Restart computer and hibernate will be back.

Thursday, June 7, 2012

Most trusted universities for online activity

The top ten most trusted universities according to iovation, with most trusted being number one, are:


 1. University of California, San Francisco
 2. Columbia University
 3. Cornell University
 4. University of Texas
 5. University of Chicago
 6. University of California, Los Angeles
 7. Northwestern University
 8. Texas A&M University
 9. University of Utah
10. University of Virginia.


Source

Tuesday, June 5, 2012

Canny and Sobel Edge Detection in C#

Sobel and Canny are major edge detection algorithms in Image Processing. Here I have implemented those algorithms using c#.

Download the source code from here.

Canny Edge Detection (click to zoom)

Sobel Edge Detection (click to zoom)




You can improve the program by using optimization methods such as threading and loop optimization.

Monday, June 4, 2012

Array addition using OpenMP

OpenMP is an application programming interface (API) that supports multi-platform shared memory multiprocessing programming.

To compile this in linux environment
$gcc -fopenmp omp_add.c -o omp_add

Then you need to define number of threads:
$export OMP_NUM_THREADS=10

To run program:
$./omp_add

Here is omp_add.c
#include <omp.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>

int main (int argc, char *argv[]) {
  
    int i, tid, nthreads, n = 10, N = 100000000;
    double *A, *B, tResult, fResult;
    
    time_t start, stop;
    clock_t ticks; long count;

      A = (double *) malloc(N*sizeof(double));
      B = (double *) malloc(N*sizeof(double));

      for (i=0; i<N; i++) {
          A[i] = (double)(i+1);
        B[i] = (double)(i+1);
      }

    time(&start);

    /*
    //this block use single process
    for (i=0; i<N; i++)
    {
            fResult = fResult + A[i] + B[i];
    }
    
    */
    
    //begin of parallel section
    
    #pragma omp parallel private(tid, i,tResult) shared(n,A,B,fResult)
    {
        tid = omp_get_thread_num();
        if (tid == 0) 
        {
            nthreads = omp_get_num_threads();
            printf("Number of threads = %d\n", nthreads);
        }

    #pragma omp for schedule (static, n)
        for (i=0; i < N; i++) {
            tResult = tResult + A[i] + B[i];
        }

    #pragma omp for nowait
        for (i=0; i < n; i++) 
        {
            printf("Thread %d does iteration %d\n", tid, i);
        }
        
    #pragma omp critical 
        fResult = fResult + tResult; 
    }
    //end of parallel section
    
    time(&stop);

      printf("%f\n",fResult);
      
       printf("Finished in about %.0f seconds. \n", difftime(stop, start));
  
     exit(0);
}

Special thanks for Dr. M.C. Jayawardena - BSc (Col), PhD(Uppsala), MIEEE, AMCS(SL) (Lecturer)

For more examples

Sunday, June 3, 2012

Matrix Multiplication using MPI with C

Here I'll give you a code for matrix multiplication using Message passing interface (MPI). If you are dealing with parallel computing MPI will take major role. Before run the MPI codes you will need to have MPI environment. In my case I am using university cluster.

Here is code

 /**********************************************************************  
  * MPI-based matrix multiplication AxB=C   
  *********************************************************************/  
 #include <stdio.h>  
 #include "mpi.h"  
 #define N 500    /* number of rows and columns in matrix */  
 MPI_Status status;  
 double a[N][N],b[N][N],c[N][N];       
 main(int argc, char **argv)   
 {  
  int numtasks,taskid,numworkers,source,dest,rows,offset,i,j,k,remainPart,originalRows;  
  struct timeval start, stop;  
  MPI_Init(&argc, &argv);  
  MPI_Comm_rank(MPI_COMM_WORLD, &taskid);  
  MPI_Comm_size(MPI_COMM_WORLD, &numtasks);  
  numworkers = numtasks-1;  
  /*---------------------------- master ----------------------------*/  
  if (taskid == 0) {  
   for (i=0; i<N; i++) {  
    for (j=0; j<N; j++) {    
     a[i][j]= 1.0;  
     b[i][j]= 2.0;  
    }  
   }  
   gettimeofday(&start, 0);  
   /* send matrix data to the worker tasks */  
   rows = N/numworkers;  
   offset = 0;  
   remainPart = N%numworkers;  
   for (dest=1; dest<=numworkers; dest++)   
   {          
    if (remainPart > 0)  
    {      
      originalRows = rows;  
      ++rows;  
      remainPart--;  
      MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);  
      MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);  
      MPI_Send(&a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);  
      MPI_Send(&b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);  
      offset = offset + rows;   
      rows = originalRows;  
    }  
    else  
    {      
        MPI_Send(&offset, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);  
        MPI_Send(&rows, 1, MPI_INT, dest, 1, MPI_COMM_WORLD);  
        MPI_Send(&a[offset][0], rows*N, MPI_DOUBLE,dest,1, MPI_COMM_WORLD);  
        MPI_Send(&b, N*N, MPI_DOUBLE, dest, 1, MPI_COMM_WORLD);  
        offset = offset + rows;  
    }  
   }  
   /* wait for results from all worker tasks */  
   for (i=1; i<=numworkers; i++)      
   {              
    source = i;  
    MPI_Recv(&offset, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);  
    MPI_Recv(&rows, 1, MPI_INT, source, 2, MPI_COMM_WORLD, &status);  
    MPI_Recv(&c[offset][0], rows*N, MPI_DOUBLE, source, 2, MPI_COMM_WORLD, &status);  
   }  
   gettimeofday(&stop, 0);  
   /* printf("Here is the result matrix:\n");  
   for (i=0; i<N; i++) {   
    for (j=0; j<N; j++)   
     printf("%6.2f  ", c[i][j]);  
    printf ("\n");  
   }  
  */  
   fprintf(stdout,"Time = %.6f\n\n",  
      (stop.tv_sec+stop.tv_usec*1e-6)-(start.tv_sec+start.tv_usec*1e-6));  
  }   
  /*---------------------------- worker----------------------------*/  
  if (taskid > 0) {  
   source = 0;  
   MPI_Recv(&offset, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);  
   MPI_Recv(&rows, 1, MPI_INT, source, 1, MPI_COMM_WORLD, &status);  
   MPI_Recv(&a, rows*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);  
   MPI_Recv(&b, N*N, MPI_DOUBLE, source, 1, MPI_COMM_WORLD, &status);  
   /* Matrix multiplication */  
   for (k=0; k<N; k++)  
    for (i=0; i<rows; i++) {  
     c[i][k] = 0.0;  
     for (j=0; j<N; j++)  
      c[i][k] = c[i][k] + a[i][j] * b[j][k];  
    }  
   MPI_Send(&offset, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);  
   MPI_Send(&rows, 1, MPI_INT, 0, 2, MPI_COMM_WORLD);  
   MPI_Send(&c, rows*N, MPI_DOUBLE, 0, 2, MPI_COMM_WORLD);  
  }   
  MPI_Finalize();  
 }   

How to compile



How to run



Special Thanks for Mr. K.P.M.K. Silva - BSc (Col), MSc(York) (Lecturer)

Saturday, June 2, 2012

Loop Optimization

  • Loop Interchange : Loops are reordered to minimize the stride and align the access pattern in the loop with the pattern of data storage in memory
Example: In C

for (I=1,4)
  for (J=1,4)
    a[J][I] = ...

after Interchanging

for (J=1,4)
  for (I=1,4)
     a[J][I] = ...
  • Loop Fusion : Adjacent or closely located loops fused into one single loop.
void nofusion()
{
  int i;
  for (i = 0; i<nodes;i++)
  {
     a[i] = a[i] * small;
     c[i] = (a[i]+b[i])*relaxn;
  }
  for (i = 1; i<nodes - 1;i++)
  {
     d[i] = c[i] - a[i];
  }
}

void fusion()
{
  int i;
  a[0] = a[0]*small;
  c[0] = (a[0]+b[0])*relaxn;
  a[nodes - 1] = a[nodes - 1] + b[nodes - 1]) * relaxn;
  for (i = 1; i < nodes - 1;i++)
  {
     a[i] = a[i] * small;
     c[i] = (a[i] + b[i]) * relaxn;
     d[i] = c[i] - a[i];
  }
}
  • Loop Fission : Split the original loop if it is worthwhile
void nofission() 
{
int i, a[100], b[100];
for (i = 0; i < 100; i++) 
{
  a[i]=1;
  b[i]=2;
}
}

void fission()
{
int i, a[100], b[100];
for (i = 0; i < 100; i++) 
{
  a[i]=1;
}
for (i = 0; i < 100; i++) 
{
  b[i] = 2;
}
}
  • Loop Peeling: Peel-off the edge iterations of the loop.
Before peeling:

for (i = 1; N; i++)
{
  if (i==1) x[i]=0;
  else
    if (i==N) x[i]=N;
    else
    x[i] = x[i] + y[i];
}

After peeling:

x[i]=0;
for (i = 2; i < N; i++)
x[i] = x[i] + y[i];
x[N] = N;
  • Loop Unrolling: Reduced the effect of branches
Before unrolling:

do i=1,N
y[i] = x[i]
enddo

After unrolling by a factor of four:

nend = 4*(N/4)
do i=1,N,4
y[i] = x[i]
y[i+1] = x[i+1]
y[i+2] = x[i+2]
y[i+3] = x[i+3]
enddo
do i = nend + 1, N
y[i] = x[i]
enddo