//------------------------------------------------------------------- // matmult.cpp // // This program uses separate threads to compute the entries // in a matrix-product. It serves to illustrate certain key // principles of 'thread safe' multiprogramming: its thread- // routine is made 'reentrant' by avoiding any alteration of // shared storage-locations, and synchronization is achieved // by 'blocking' the parent-process until every child-thread // has terminated. (With large size matrices there could be // a speedup when executing on a machine with multiple CPUs, // though clearly no speed-advantages would be achieved on a // single-processor system -- just extra system "overhead".) // // programmer: ALLAN CRUSE // written on: 17 OCT 2004 //------------------------------------------------------------------- #include // for printf(), perror() #include // for exit() #include // for clone() #include // for time() #include // for wait() #define N 4 #define FLAGS ( SIGCHLD | CLONE_VM ) typedef int MATRIX[ N ][ N ]; typedef struct { MATRIX *u, *v, *w; int row, col; } INFO; int dot( void *data ) // <--- This is the 'reentrant' thread-routine { INFO *info = (INFO*)data; MATRIX *a = info->u; MATRIX *b = info->v; MATRIX *c = info->w; int r = info->row; int k = info->col; (*c)[r][k] = 0; for (int i = 0; i < N; i++) (*c)[r][k] += (*a)[r][i] * (*b)[i][k]; return 0; } void show_matrix( int r, int k, MATRIX &m ) { for (int i = 0; i < r; i++) { printf( "\n" ); for (int j = 0; j < k; j++) printf( "%5d ", m[i][j] ); } printf( "\n" ); } int main( int argc, char **argv ) { // initialize the matrices a and b MATRIX a, b, c; srand( time( NULL ) ); for (int i = 0; i < N; i++) for (int j = 0; j < N; j++) { a[ i ][ j ] = ( rand()%19 ) - 9; b[ i ][ j ] = ( rand()%19 ) - 9; } // initialize our array of info-structures INFO info[ N ][ N ]; for (int r = 0; r < N; r++) for (int k = 0; k < N; k++) { info[ r ][ k ].u = &a; info[ r ][ k ].v = &b; info[ r ][ k ].w = &c; info[ r ][ k ].row = r; info[ r ][ k ].col = k; } // allocate storage for the threads' stacks typedef char stack_t[ 4096 ]; int number_of_threads = N * N; void *storage = malloc( number_of_threads * sizeof( stack_t ) ); if ( storage == NULL ) { perror( "malloc" ); exit(1); } // execute a thread for each entry in the matrix-product void *tos = storage; for (int r = 0; r < N; r++) for (int k = 0; k < N; k++) { tos = (void*)((int)tos + sizeof( stack_t )); clone( dot, tos, FLAGS, &info[r][k] ); } // wait until all the threads finish, then release their stacks for (int r = 0; r < N; r++) for (int k = 0; k < N; k++) wait( NULL ); free( storage ); // display the computation results printf( "\nMatrix A:\n" ); show_matrix( N, N, a ); printf( "\nMatrix B:\n" ); show_matrix( N, N, b ); printf( "\nMatrix C:\n" ); show_matrix( N, N, c ); // verify the accuracy of our matrix result int errors = 0; for (int r = 0; r < N; r++) for (int k = 0; k < N; k++) { for (int j = 0; j < N; j++) c[r][k] -= a[r][j] * b[j][k]; if ( c[r][k] != 0 ) ++errors; } printf( "\nNumber of errors = %d \n\n", errors ); }