Реализация параллельного алгоритма умножения матриц
Ниже приводится исходный код программы, реализующей параллельный алгоритм перемножения матриц на основе алгоритма Фокса:
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <math.h>
#include <mpi.h>
int ProcNum = 0; // Number of available processes
int ProcRank = 0; // Rank of current process
int GridSize; // Size of virtual processor grid
int GridCoords[2]; // Coordinates of current processor in grid
MPI_Comm GridComm; // Grid communicator
MPI_Comm ColComm; // Column communicator
MPI_Comm RowComm; // Row communicator
// Function for simple initialization of matrix elements
void DummyDataInitialization(double* pAMatrix, double* pBMatrix,int Size){
int i, j; // Loop variables
for (i=0; i<Size; i++)
for (j=0; j<Size; j++) {
pAMatrix[i*Size+j] = 1;
pBMatrix[i*Size+j] = 1;
}
}
// Function for random initialization of matrix elements
void RandomDataInitialization(double* pAMatrix, double* pBMatrix,
int Size) {
int i, j; // Loop variables
srand(unsigned(clock()));
for (i=0; i<Size; i++)
for (j=0; j<Size; j++) {
pAMatrix[i*Size+j] = rand()/double(1000);
pBMatrix[i*Size+j] = rand()/double(1000);
}
}
// Function for formatted matrix output
void PrintMatrix(double* pMatrix, int RowCount, int ColCount) {
int i, j; // Loop variables
for (i=0; i<RowCount; i++) {
for (j=0; j<ColCount; j++)
printf("%7.4f ", pMatrix[i*ColCount+j]);
printf("\n");
}
}
// Function for matrix multiplication
void SerialResultCalculation(double* pAMatrix, double* pBMatrix,
double* pCMatrix, int Size) {
int i, j, k; // Loop variables
for (i=0; i<Size; i++) {
for (j=0; j<Size; j++)
for (k=0; k<Size; k++)
pCMatrix[i*Size+j] += pAMatrix[i*Size+k]*pBMatrix[k*Size+j];
}
}
// Function for block multiplication
void BlockMultiplication(double* pAblock, double* pBblock,
double* pCblock, int Size) {
SerialResultCalculation(pAblock, pBblock, pCblock, Size);
}
// Creation of two-dimensional grid communicator
// and communicators for each row and each column of the grid
void CreateGridCommunicators() {
int DimSize[2]; // Number of processes in each dimension of the grid
int Periodic[2]; // =1, if the grid dimension should be periodic
int Subdims[2]; // =1, if the grid dimension should be fixed
DimSize[0] = GridSize;
DimSize[1] = GridSize;
Periodic[0] = 0;
Periodic[1] = 0;
// Creation of the Cartesian communicator
MPI_Cart_create(MPI_COMM_WORLD, 2, DimSize, Periodic, 1, &GridComm);
// Determination of the cartesian coordinates for every process
MPI_Cart_coords(GridComm, ProcRank, 2, GridCoords);
// Creating communicators for rows
Subdims[0] = 0; // Dimensionality fixing
Subdims[1] = 1; // The presence of the given dimension in the subgrid
MPI_Cart_sub(GridComm, Subdims, &RowComm);
// Creating communicators for columns
Subdims[0] = 1;
Subdims[1] = 0;
MPI_Cart_sub(GridComm, Subdims, &ColComm);
}
// Function for memory allocation and data initialization
void ProcessInitialization(double* &pAMatrix, double* &pBMatrix,
double* &pCMatrix, double* &pAblock, double* &pBblock, double* &pCblock,
double* &pTemporaryAblock, int &Size, int &BlockSize ) {
if (ProcRank == 0) {
do {
printf("\nEnter size of the initial objects: ");
scanf("%d", &Size);
if (Size%GridSize != 0) {
printf ("Size of matricies must be divisible by the grid size!\n");
}
}
while (Size%GridSize != 0);
}
MPI_Bcast(&Size, 1, MPI_INT, 0, MPI_COMM_WORLD);
BlockSize = Size/GridSize;
pAblock = new double [BlockSize*BlockSize];
pBblock = new double [BlockSize*BlockSize];
pCblock = new double [BlockSize*BlockSize];
pTemporaryAblock = new double [BlockSize*BlockSize];
for (int i=0; i<BlockSize*BlockSize; i++) {
pCblock[i] = 0;
}
if (ProcRank == 0) {
pAMatrix = new double [Size*Size];
pBMatrix = new double [Size*Size];
pCMatrix = new double [Size*Size];
DummyDataInitialization(pAMatrix, pBMatrix, Size);
//RandomDataInitialization(pAMatrix, pBMatrix, Size);
}
}
// Function for checkerboard matrix decomposition
void CheckerboardMatrixScatter(double* pMatrix, double* pMatrixBlock,
int Size, int BlockSize) {
double * MatrixRow = new double [BlockSize*Size];
if (GridCoords[1] == 0) {
MPI_Scatter(pMatrix, BlockSize*Size, MPI_DOUBLE, MatrixRow,
BlockSize*Size, MPI_DOUBLE, 0, ColComm);
}
for (int i=0; i<BlockSize; i++) {
MPI_Scatter(&MatrixRow[i*Size], BlockSize, MPI_DOUBLE,
&(pMatrixBlock[i*BlockSize]), BlockSize, MPI_DOUBLE, 0, RowComm);
}
delete [] MatrixRow;
}
// Data distribution among the processes
void DataDistribution(double* pAMatrix, double* pBMatrix, double*
pMatrixAblock, double* pBblock, int Size, int BlockSize) {
// Scatter the matrix among the processes of the first grid column
CheckerboardMatrixScatter(pAMatrix, pMatrixAblock, Size, BlockSize);
CheckerboardMatrixScatter(pBMatrix, pBblock, Size, BlockSize);
}
// Function for gathering the result matrix
void ResultCollection(double* pCMatrix, double* pCblock, int Size,
int BlockSize) {
double * pResultRow = new double [Size*BlockSize];
for (int i=0; i<BlockSize; i++) {
MPI_Gather( &pCblock[i*BlockSize], BlockSize, MPI_DOUBLE,
&pResultRow[i*Size], BlockSize, MPI_DOUBLE, 0, RowComm);
}
if (GridCoords[1] == 0) {
MPI_Gather(pResultRow, BlockSize*Size, MPI_DOUBLE, pCMatrix,
BlockSize*Size, MPI_DOUBLE, 0, ColComm);
}
delete [] pResultRow;
}
// Broadcasting matrix A blocks to process grid rows
void ABlockCommunication(int iter, double *pAblock, double* pMatrixAblock,
int BlockSize) {
// Defining the leading process of the process grid row
int Pivot = (GridCoords[0] + iter) % GridSize;
// Copying the transmitted block in a separate memory buffer
if (GridCoords[1] == Pivot) {
for (int i=0; i<BlockSize*BlockSize; i++)
pAblock[i] = pMatrixAblock[i];
}
// Block broadcasting
MPI_Bcast(pAblock, BlockSize*BlockSize, MPI_DOUBLE, Pivot, RowComm);
}
// Cyclic shift of matrix B blocks in the process grid columns
void BblockCommunication(double *pBblock, int BlockSize) {
MPI_Status Status;
int NextProc = GridCoords[0] + 1;
if ( GridCoords[0] == GridSize-1 ) NextProc = 0;
int PrevProc = GridCoords[0] - 1;
if ( GridCoords[0] == 0 ) PrevProc = GridSize-1;
MPI_Sendrecv_replace( pBblock, BlockSize*BlockSize, MPI_DOUBLE,
NextProc, 0, PrevProc, 0, ColComm, &Status);
}
void ParallelResultCalculation(double* pAblock, double* pMatrixAblock,
double* pBblock, double* pCblock, int BlockSize) {
for (int iter = 0; iter < GridSize; iter ++) {
// Sending blocks of matrix A to the process grid rows
ABlockCommunication (iter, pAblock, pMatrixAblock, BlockSize);
// Block multiplication
BlockMultiplication(pAblock, pBblock, pCblock, BlockSize);
// Cyclic shift of blocks of matrix B in process grid columns
BblockCommunication(pBblock, BlockSize);
}
}
// Test printing of the matrix block
void TestBlocks(double* pBlock, int BlockSize, char str[]) {
MPI_Barrier(MPI_COMM_WORLD);
if (ProcRank == 0) {
printf("%s \n", str);
}
for (int i=0; i<ProcNum; i++) {
if (ProcRank == i) {
printf ("ProcRank = %d \n", ProcRank);
PrintMatrix(pBlock, BlockSize, BlockSize);
}
MPI_Barrier(MPI_COMM_WORLD);
}
}
void TestResult(double* pAMatrix, double* pBMatrix, double* pCMatrix,
int Size) {
double* pSerialResult; // Result matrix of serial multiplication
double Accuracy = 1.e-6; // Comparison accuracy
int equal = 0; // =1, if the matrices are not equal
int i; // Loop variable
if (ProcRank == 0) {
pSerialResult = new double [Size*Size];
for (i=0; i<Size*Size; i++) {
pSerialResult[i] = 0;
}
BlockMultiplication(pAMatrix, pBMatrix, pSerialResult, Size);
for (i=0; i<Size*Size; i++) {
if (fabs(pSerialResult[i]-pCMatrix[i]) >= Accuracy)
equal = 1;
}
if (equal == 1)
printf("The results of serial and parallel algorithms are NOT "
"identical. Check your code.");
else
printf("The results of serial and parallel algorithms are "
"identical.");
}
}
// Function for computational process termination
void ProcessTermination(double* pAMatrix, double* pBMatrix,
double* pCMatrix, double* pAblock, double* pBblock, double* pCblock,
double* pMatrixAblock) {
if (ProcRank == 0) {
delete [] pAMatrix;
delete [] pBMatrix;
delete [] pCMatrix;
}
delete [] pAblock;
delete [] pBblock;
delete [] pCblock;
delete [] pMatrixAblock;
}
void main(int argc, char* argv[]) {
double* pAMatrix; // The first argument of matrix multiplication
double* pBMatrix; // The second argument of matrix multiplication
double* pCMatrix; // The result matrix
int Size; // Size of matricies
int BlockSize; // Sizes of matrix blocks on current process
double *pAblock; // Initial block of matrix A on current process
double *pBblock; // Initial block of matrix B on current process
double *pCblock; // Block of result matrix C on current process
double *pMatrixAblock;
double Start, Finish, Duration;
setvbuf(stdout, 0, _IONBF, 0);
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &ProcNum);
MPI_Comm_rank(MPI_COMM_WORLD, &ProcRank);
GridSize = sqrt((double)ProcNum);
if (ProcNum != GridSize*GridSize) {
if (ProcRank == 0) {
printf ("Number of processes must be a perfect square \n");
}
}
else {
if (ProcRank == 0)
printf("Parallel matrix multiplication program\n");
// Creating the cartesian grid, row and column communcators
CreateGridCommunicators();
// Memory allocation and initialization of matrix elements
ProcessInitialization ( pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock,
pCblock, pMatrixAblock, Size, BlockSize );
DataDistribution(pAMatrix, pBMatrix, pMatrixAblock, pBblock, Size,
BlockSize);
// Execution of Fox method
ParallelResultCalculation(pAblock, pMatrixAblock, pBblock,
pCblock, BlockSize);
ResultCollection(pCMatrix, pCblock, Size, BlockSize);
TestResult(pAMatrix, pBMatrix, pCMatrix, Size);
// Process Termination
ProcessTermination (pAMatrix, pBMatrix, pCMatrix, pAblock, pBblock,
pCblock, pMatrixAblock);
}
MPI_Finalize();
}
В программе можно выделить несколько функциональных модулей: