/* File: mpi_mat_vect_sub.c * * Purpose: Implement parallel matrix-vector multiplication using * one-dimensional arrays to store the vectors and the * matrix. Vectors use block distributions and the * matrix is distributed by block submatrices. This doesn't * do ``real'' I/O of the matrix or vector. If DEBUG * is defined, it will generate a matrix with integer * entries and print the product vector. * * Compile: mpicc -g -Wall -o mpi_mat_vect_col mpi_mat_vect_col.c -lm * Run: mpiexec -n ./mpi_mat_vect_col * n is the order of the system * * Input: none * Output: Elapsed time * * Notes: * 1. Number of processes should be a perfect square, and its * square root should evenly divide n * 2. Define DEBUG for verbose output */ #include #include #include #include int my_rank, comm_sz; int row_comm_sz, col_comm_sz; int my_row_rank, my_col_rank; MPI_Comm comm, row_comm, col_comm; int diag, which_row_comm, which_col_comm; void Build_comms(void); void Check_for_error(int local_ok, char fname[], char message[]); void Get_dims(int argc, char* argv[], int* m_p, int* local_m_p, int* n_p, int* local_n_p); void Allocate_arrays(double** local_A_pp, double** local_x_pp, double** local_y_pp, int m, int local_m, int n, int local_n); void Generate_matrix(double local_A[], int rows, int cols); void Generate_vector(double local_x[], int local_n); void Print_vector(char title[], double local_vec[], int n, int local_n); void Mat_vect_mult(double local_A[], double local_x[], double local_y[], double sub_y[], int m, int local_m, int n, int local_n); /*-------------------------------------------------------------------*/ int main(int argc, char* argv[]) { double* local_A; double* local_x; double* local_y; double* sub_y; int m, local_m, n, local_n; double start, finish, loc_elapsed, elapsed; MPI_Init(&argc, &argv); Build_comms(); Get_dims(argc, argv, &m, &local_m, &n, &local_n); Allocate_arrays(&local_A, &local_x, &local_y, m, local_m, n, local_n); srandom(my_rank); Generate_matrix(local_A, local_m, local_n); Generate_vector(local_x, local_n); # ifdef DEBUG Print_vector("x", local_x, n, local_n); # endif sub_y = malloc(local_m*sizeof(double)); MPI_Barrier(comm); start = MPI_Wtime(); Mat_vect_mult(local_A, local_x, local_y, sub_y, m, local_m, n, local_n); finish = MPI_Wtime(); loc_elapsed = finish-start; MPI_Reduce(&loc_elapsed, &elapsed, 1, MPI_DOUBLE, MPI_MAX, 0, comm); # ifdef DEBUG Print_vector("y", local_y, m, local_m); # endif if (my_rank == 0) { // printf("Elapsed time = %e\n", elapsed); printf("%e\n", elapsed); // fprintf(stderr, "From mat-vect: p = %d, n = %d, elapsed = %e\n", // comm_sz, n, elapsed); } free(local_A); free(local_x); free(local_y); free(sub_y); MPI_Finalize(); return 0; } /* main */ /*-------------------------------------------------------------------*/ void Build_comms(void ) { MPI_Comm grid_comm; int p; int dim_sizes[2]; int wrap_around[] = {0,0}; int reorder = 1; int local_ok = 1; int coords[2]; int free_coords[2]; /* Build communicator for entire grid */ comm = MPI_COMM_WORLD; MPI_Comm_size(MPI_COMM_WORLD, &p); int q = sqrt(p); if (p != q*q) local_ok = 0; Check_for_error(local_ok, "Build_comms", "p not a perfect square"); dim_sizes[0] = dim_sizes[1] = q; MPI_Cart_create(MPI_COMM_WORLD, 2, dim_sizes, wrap_around, reorder, &grid_comm); comm = grid_comm; MPI_Comm_size(grid_comm, &comm_sz); MPI_Comm_rank(grid_comm, &my_rank); /* Build a communicator for each process row */ free_coords[0] = 0; free_coords[1] = 1; MPI_Cart_sub(grid_comm, free_coords, &row_comm); MPI_Comm_size(row_comm, &row_comm_sz); MPI_Comm_rank(row_comm, &my_row_rank); /* Build a communicator for each process col */ free_coords[0] = 1; free_coords[1] = 0; MPI_Cart_sub(grid_comm, free_coords, &col_comm); MPI_Comm_size(col_comm, &col_comm_sz); MPI_Comm_rank(col_comm, &my_col_rank); /* Record grid info */ if (my_row_rank == my_col_rank) diag = 1; else diag = 0; MPI_Cart_coords(comm, my_rank, 2, coords); which_row_comm = coords[0]; which_col_comm = coords[1]; } /* Build_comms */ /*-------------------------------------------------------------------*/ void Check_for_error( int local_ok /* in */, char fname[] /* in */, char message[] /* in */) { int ok; MPI_Allreduce(&local_ok, &ok, 1, MPI_INT, MPI_MIN, comm); if (ok == 0) { int my_rank; MPI_Comm_rank(comm, &my_rank); if (my_rank == 0) { fprintf(stderr, "Proc %d > In %s, %s\n", my_rank, fname, message); fflush(stderr); } MPI_Finalize(); exit(-1); } } /* Check_for_error */ /*-------------------------------------------------------------------*/ void Get_dims( int argc /* in */, char* argv[] /* in */, int* m_p /* out */, int* local_m_p /* out */, int* n_p /* out */, int* local_n_p /* out */) { int local_ok = 1; if (my_rank == 0) { if (argc != 2) *m_p = *n_p = 0; else *m_p = *n_p = strtol(argv[1], NULL, 10); } MPI_Bcast(m_p, 1, MPI_INT, 0, comm); MPI_Bcast(n_p, 1, MPI_INT, 0, comm); if (*m_p <= 0 || *n_p <= 0 || *m_p % col_comm_sz != 0 || *n_p % row_comm_sz != 0) local_ok = 0; Check_for_error(local_ok, "Get_dims", "m and n must be positive and evenly divisible by comm_sz"); *local_m_p = *m_p/row_comm_sz; *local_n_p = *n_p/col_comm_sz; } /* Get_dims */ /*-------------------------------------------------------------------*/ void Allocate_arrays( double** local_A_pp /* out */, double** local_x_pp /* out */, double** local_y_pp /* out */, int m /* in */, int local_m /* in */, int n /* in */, int local_n /* in */) { int local_ok = 1; *local_A_pp = malloc(local_m*local_n*sizeof(double)); *local_x_pp = malloc(local_n*sizeof(double)); *local_y_pp = malloc(local_m*sizeof(double)); if (*local_A_pp == NULL || local_x_pp == NULL || local_y_pp == NULL) local_ok = 0; Check_for_error(local_ok, "Allocate_arrays", "Can't allocate local arrays"); } /* Allocate_arrays */ /*-------------------------------------------------------------------*/ void Read_vector( char prompt[] /* in */, double local_vec[] /* out */, int n /* in */, int local_n /* in */) { double* vec = NULL; int i, local_ok = 1; if (my_rank == 0) { vec = malloc(n*sizeof(double)); if (vec == NULL) local_ok = 0; Check_for_error(local_ok, "Read_vector", "Can't allocate temporary vector"); printf("Enter the vector %s\n", prompt); for (i = 0; i < n; i++) scanf("%lf", &vec[i]); MPI_Scatter(vec, local_n, MPI_DOUBLE, local_vec, local_n, MPI_DOUBLE, 0, comm); free(vec); } else { Check_for_error(local_ok, "Read_vector", "Can't allocate temporary vector"); MPI_Scatter(vec, local_n, MPI_DOUBLE, local_vec, local_n, MPI_DOUBLE, 0, comm); } } /* Read_vector */ /*-------------------------------------------------------------------*/ void Generate_matrix(double local_A[], int rows, int cols) { int i, j; # ifndef DEBUG for (i = 0; i < rows; i++) for (j = 0; j < cols; j++) local_A[i*cols + j] = ((double) random())/((double) RAND_MAX); # else for (i = 0; i < rows; i++) for (j = 0; j < cols; j++) local_A[i*cols + j] = my_rank + i; # endif } /* Generate_matrix */ /*-------------------------------------------------------------------*/ void Generate_vector(double local_x[], int local_n) { int i; if (diag) { # ifndef DEBUG for (i = 0; i < local_n; i++) local_x[i] = ((double) random())/((double) RAND_MAX); # else for (i = 0; i < local_n; i++) local_x[i] = my_rank + 1; # endif } } /* Generate_vector */ /*------------------------------------------------------------------- * Note: This only prints a matrix distributed by block rows */ void Print_matrix( char title[] /* in */, double local_A[] /* in */, int m /* in */, int local_m /* in */, int n /* in */, int local_n /* in */) { double* A = NULL; int i, j, local_ok = 1; if (my_rank == 0) { A = malloc(m*n*sizeof(double)); if (A == NULL) local_ok = 0; Check_for_error(local_ok, "Print_matrix", "Can't allocate temporary matrix"); MPI_Gather(local_A, local_m*n, MPI_DOUBLE, A, local_m*n, MPI_DOUBLE, 0, comm); printf("\nThe matrix %s\n", title); for (i = 0; i < m; i++) { for (j = 0; j < n; j++) printf("%f ", A[i*n+j]); printf("\n"); } printf("\n"); free(A); } else { Check_for_error(local_ok, "Print_matrix", "Can't allocate temporary matrix"); MPI_Gather(local_A, local_m*n, MPI_DOUBLE, A, local_m*n, MPI_DOUBLE, 0, comm); } } /* Print_matrix */ /*-------------------------------------------------------------------*/ void Print_vector( char title[] /* in */, double local_vec[] /* in */, int n /* in */, int local_n /* in */) { double* vec = NULL; int i, q, local_ok = 1, incr = row_comm_sz+1; if (my_rank == 0) { vec = malloc(local_n*sizeof(double)); if (vec == NULL) local_ok = 0; Check_for_error(local_ok, "Print_vector", "Can't allocate temporary vector"); printf("\nThe vector %s\n", title); for (i = 0; i < local_n; i++) printf("%f ", local_vec[i]); for (q = incr; q < comm_sz; q += incr) { // printf("Proc %d > Trying to receive from %d\n", my_rank, q); // fflush(stdout); MPI_Recv(vec, local_n, MPI_DOUBLE, q, 0, comm, MPI_STATUS_IGNORE); for (i = 0; i < local_n; i++) printf("%f ", vec[i]); } printf("\n"); free(vec); } else { Check_for_error(local_ok, "Print_vector", "Can't allocate temporary vector"); if (diag) { // printf("Proc %d > Sending to 0\n", my_rank); // fflush(stdout); MPI_Send(local_vec, local_n, MPI_DOUBLE, 0, 0, comm); } } } /* Print_vector */ /*-------------------------------------------------------------------*/ void Mat_vect_mult( double local_A[] /* in */, double local_x[] /* in */, double local_y[] /* out */, double sub_y[] /* in */, int m /* in */, int local_m /* in */, int n /* in */, int local_n /* in */) { int local_i, local_j; MPI_Bcast(local_x, local_n, MPI_DOUBLE, which_col_comm, col_comm); for (local_i = 0; local_i < local_m; local_i++) { sub_y[local_i] = 0.0; for (local_j = 0; local_j < local_n; local_j++) sub_y[local_i] += local_A[local_i*local_n + local_j]* local_x[local_j]; } MPI_Reduce(sub_y, local_y, local_m, MPI_DOUBLE, MPI_SUM, which_row_comm, row_comm); } /* Mat_vect_mult */