mpi 矩阵相乘 c语言 5

用c语言编写的并行mpi程序,实现矩阵相乘,其中矩阵的大小可自己定义... 用c语言编写的并行mpi程序,实现矩阵相乘,其中矩阵的大小可自己定义 展开
 我来答
chinaisland
2010-12-24
知道答主
回答量:5
采纳率:0%
帮助的人:0
展开全部
!
! a cross b.f
!
! Fixed-Format Fortran Source File
! Generated by PGI Visual Fortran(R)
! 2010-12-12 21:58:04
!
!Parallel matrix multiplication: main program

program cross
implicit double precision (a-h, o-z)
include 'mpif.h'
parameter (nbuffer=128*1024*1024/8)
dimension buf(nbuffer),buf2(nbuffer)
double precision time_start, time_end
external init, check, matmul

call MPI_Init(ierr)
call MPI_Comm_rank(MPI_COMM_WORLD, myrank, ierr)
call MPI_Comm_size(MPI_COMM_WORLD, nprocs, ierr)

if (myrank.eq.0) then
print *, 'Enter M, N, L: '
call flush(6)
read(*,*) M, N, L
endif
call MPI_Bcast(M, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(N, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)
call MPI_Bcast(L, 1, MPI_INTEGER, 0, MPI_COMM_WORLD, ierr)

if ( mod(m,nprocs).ne.0 .or. mod(l,nprocs).ne.0 ) then
if (myrank.eq.0) print *, 'M or L cannot be divided by nprocs!'
call MPI_Finalize(ierr)
stop
endif

ia = 1
ib = ia + m/nprocs ! n
ic = ib + n ! l/nprocs
iwk = ic + m/nprocs ! l
iend = iwk + n ! l/nprocs
if ( iend .gt. nbuffer+1 ) then
if (myrank.eq.0) print *, 'Insufficient buffer size!'
call MPI_Finalize(ierr)
stop
endif

call init( m, n, l, myrank, nprocs, buf(ia), buf(ib), buf(ic)
& , buf2(ia),buf2(ib),buf2(ic) )

time_start = MPI_Wtime()
call matmul( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic)
& , buf2(iwk) )
time_end = MPI_Wtime()

call check( m, n, l, myrank, nprocs, buf2(ia), buf2(ib), buf2(ic))

if ( myrank .eq. 0 ) then
print *, 'time = ', time_end-time_start
print *, 'mflops = ', m*(n+n-1.0)*l/(time_end-time_start)*1d-6
endif

print*,'ok'
call MPI_Finalize(ierr)
stop
end

!------------------------------------------------------------------

subroutine init(m, n, l, myrank, nprocs, a, b, c, a2, b2,c2)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
dimension a2(n, m/nprocs), b2(l/nprocs, n), c2(l,m/nprocs)

mloc = m/nprocs
lloc = l/nprocs

! Init. a, b
do j=1, n
do i=1, mloc
a(i,j) = i+myrank*mloc
enddo
enddo

do j=1, lloc
do i=1, n
b(i,j) = j+myrank*lloc
enddo
enddo

! Tranpose a, b -> a2, b2
do j=1, mloc
do i=1,n
a2(i,j) = a(j,i)
enddo
enddo

do j=1, n
do i=1,lloc
b2(i,j) = b(j,i)
enddo
enddo

return
end

!------------------------------------------------------------------

subroutine check(m, n, l, myrank, nprocs, a, b, c)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(m/nprocs, n), b(n, l/nprocs), c(m/nprocs, l)
!dimension a(n,m/nprocs), b(l/nprocs,n), c(l,m/nprocs)
integer local_code, code

mloc = m/nprocs
lloc = l/nprocs

!Check the results
local_code = 0
do i=1, l
do j=1, mloc
if ( abs(c(i,j) - n*dble(j+myrank*lloc)*i) .gt. 1d-10 ) then
local_code = 1
print*,'local_code=',local_code
goto 10
endif
enddo
enddo

10 call MPI_Reduce( local_code, code, 1, MPI_INTEGER, MPI_SUM, 0,
& MPI_COMM_WORLD, ierr)
!
if ( myrank .eq. 0 ) then
print *, 'code = ', code
endif
!
return
end

* !Parallel multiplication of matrices using MPI_Isend/MPI_Irecv
*
subroutine matmul(m, n, l, myrank, nprocs, a, b, c, work)
implicit double precision (a-h, o-z)
include 'mpif.h'
dimension a(n,m/nprocs), b(l/nprocs,n), c(l/nprocs,m),
& work(n,m/nprocs)
integer src, dest, tag
integer status(MPI_STATUS_SIZE, 2), request(2)
*
mloc = m/nprocs
lloc = l/nprocs
*
dest = mod( myrank-1+nprocs, nprocs )
src = mod( myrank+1, nprocs )
*
jpos=myrank*mloc
print*,'myrank=',myrank
c print*,'dest=',dest,'src=',src
c print*,'jpos=',jpos,'tag=',tag

*
do ip=1, nprocs - 1
tag = 10000 + ip
*
call MPI_Isend( a, n*mloc, MPI_DOUBLE_PRECISION, dest, tag,
& MPI_COMM_WORLD, request(1), ierr )
call MPI_Irecv( work, n*mloc, MPI_DOUBLE_PRECISION, src, tag,
& MPI_COMM_WORLD, request(2), ierr )
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
call MPI_Waitall(2, request, status, ierr)
*
* 拷贝 work -> b (可以通过在计算/通信中交替使用 b/work 来避该免操作)
do i=1, n
do j=1, mloc
a(i,j) = work(i,j)
enddo
enddo
*
jpos = jpos + mloc
if ( jpos .ge. m ) jpos = 0
*
enddo
*
do i=1, lloc
do j=1, mloc
sum=0.d0
do k=1, n
sum = sum + b(i,k) * a(k,j)
enddo
c(i, j+jpos) = sum
enddo
enddo
*
print*,'c(1,mloc)=',c(1,mloc)
print*,'c(1,2)=', c(1,2)
print*,'c(2,1)=', c(2,1)
print*,'c(lloc,1)=',c(lloc,1)
return
end
本回答被网友采纳
已赞过 已踩过<
你对这个回答的评价是?
评论 收起
a1076534701
2010-12-10
知道答主
回答量:27
采纳率:0%
帮助的人:7.4万
展开全部
斯蒂芬
已赞过 已踩过<
你对这个回答的评价是?
评论 收起
收起 1条折叠回答
推荐律师服务: 若未解决您的问题,请您详细描述您的问题,通过百度律临进行免费专业咨询

为你推荐:

下载百度知道APP,抢鲜体验
使用百度知道APP,立即抢鲜体验。你的手机镜头里或许有别人想知道的答案。
扫描二维码下载
×

类别

我们会通过消息、邮箱等方式尽快将举报结果通知您。

说明

0/200

提交
取消

辅 助

模 式