Sunteți pe pagina 1din 3

#include<stdio.

h>
#include<conio.h>
#include<cuda.h>

__global__ void mulmatrix(float *first , float *second , float *result ,int widt
h1 ,int width2)
{
int idx = threadIdx.x;
int idy = threadIdx.y;
int k ;
float a ,b ,sum;
sum = 0;
for(k=0;k<width1;k++)
{
a = first[idy*width1+k];
b = second[k*(width2)+idx];
sum = sum + (a*b);
}
result[idy*(width2)+idx] = sum;
}
void main()
{
float *a_h, *b_h , *c_h ,*a_d,*b_d,*c_d;
int width1, width2 , row1,col1,row2,col2;
printf("Enter the size of the first matrix>>");
scanf("%d %d",&row1,&col1);
printf("Enter the size of the second matrix>>");
scanf("%d %d",&row2,&col2);
//memory allocation on the host
a_h =(float*)malloc(sizeof(float)*row1*col1);
b_h =(float*)malloc(sizeof(float)*row2*col2);
c_h =(float*)malloc(sizeof(float)*row1*col2);

//memory allocation on device


cudaMalloc((void**)&a_d,sizeof(float)*row1*col1);
cudaMalloc((void**)&b_d,sizeof(float)*row2*col2);
cudaMalloc((void**)&c_d,sizeof(float)*row1*col2);
//Input
printf("Enter the first matrix\n");
for(int i=0;i<row1;i++)
{
for(int j=0;j<col1;j++)
{
printf("Enter the Element a_h[%d][%d]>>",i,j);
scanf("%f",&a_h[i*col1+j]);
}
}
printf("Enter the second matrix\n");
for(int i=0;i<row2;i++)
{
for(int j=0;j<col2;j++)
{
printf("Enter the Element b_h[%d][%d]>>",i,j);
scanf("%f",&b_h[i*col2+j]);
}
}
//show data
for(int i=0;i<row1;i++)
{
for(int j=0;j<col1;j++)
{
printf("%f\t",a_h[i*col1+j]);
}
printf("\n");
}
for(int i=0;i<row2;i++)
{
for(int j=0;j<col2;j++)
{
printf("%f\t",b_h[i*col2+j]);
}
printf("\n");
}
width1 = col1; // width of first matrix
width2 = col2; // width of second matrix

//copy data from host to device


cudaMemcpy(a_d,a_h,sizeof(float)*row1*col1,cudaMemcpyHostToDevice);
cudaMemcpy(b_d,b_h,sizeof(float)*row2*col2,cudaMemcpyHostToDevice);
//Kernel call
dim3 dimBlock(col2,row1,1);//threadsPerBlock
dim3 dimGrid(1,1);
mulmatrix<<<dimGrid,dimBlock>>>(a_d , b_d , c_d ,width1,width2);
//retrive data from the device
cudaMemcpy(c_h,c_d,sizeof(float)*row1*col2,cudaMemcpyDeviceToHost);
//show result
printf("\n\n");
for(int i=0;i<row1;i++)
{
for(int j=0;j<col2;j++)
{
printf("%f\t",c_h[i*col2+j]);
}
printf("\n");
}
getch();
//deallocating the memory
free(a_h);
free(b_h);
free(c_h);
cudaFree(a_d);
cudaFree(b_d);
cudaFree(c_d);
}

S-ar putea să vă placă și