!******************************************************************************** !* This program solves the Laplace equation in a square domain by Jacobi !* iteration with MPI. An example MPI program for CFD beginners. !* !* written by Dr. Katate Masatsuka (info[at]cfdbooks.com), !* !* the author of useful CFD books, "I do like CFD" (http://www.cfdbooks.com). !* !* !* This is Version 1 (2015). !* !* This F90 program is written and made available for an educational purpose. !* This program is not optimal nor elegant in any sense. You would want to !* write one by yourself once you understand what is going on in this progarm. !* !* Please feel free to contact the author about suggestions. !* You can ask questions also, but please don't expect good answers because !* the author is not an expert in MPI programming. !* !* This file may be updated in future. !* !* Note: I cannot advise you about how to compile and run this program. !* But here is what I do to run this program in the NIA cluster: !* !* prompt% module load openmpi/gcc/64/1.8.1 !* prompt% mpif90 jacobi_mpi_v1.f90 !* prompt% salloc --ntasks=4 mpirun ./a.out <- To run with 4 processors !* !* (Please consult a manual for the cluster/computer you wish to run it on.) !* !* Katate Masatsuka, March 2015. http://www.cfdbooks.com !******************************************************************************** !******************************************************************************** ! ! Input : nx_global = number of nodes in x-coordinate direction ! ny_global = number of nodes in y-coordinate direction ! (to be specified in the program) ! ! Output: 1. Tecplot files for the grid and the solution: ! ! tecplot_0.dat ! tecplot_1.dat ! tecplot_2.dat ! : ! tecplot_npes.dat, npes is the number of processors ! ! 2. Output files ! ! fort.1000 ! fort.1001 ! fort.1002 ! : ! fort.(1000+npes) ! ! !---------------------------- ! Laplace's equation for temperature (the steady heat equation): ! ! (temperature)_xx + (temperature)_yy = 0, in (x,y)=(0,1)x(0,1). ! ! !---------------------------- ! Exact solution: ! ! temperature = (sinh(pi*x)*sin(pi*y)+sinh(pi*y)*sin(pi*x))/sinh(pi) ! ! !---------------------------- ! Domain: ! ! y=1 _____________________________ ! | | ! | | ! | | ! | | ! | | ! | | ! | | ! | | ! | | ! | | ! | | ! y=0|_____________________________| ! x=0 x=1 ! ! !---------------------------- ! Jacobi iteration: ! ! Jacobi iteration is equivalent to explicit time-stepping for dT/dt=Laplace(T): ! ! (Tnew-Told)/(time_step) = Laplace(Told) ! -> Tnew = Told + (time_step)*(Laplace(Told)) ! -> Tnew = Told + (time_step)*(Finite-Difference(Told)) ! -> Tnew(i,j) = Told + (time_step)*( Told(i-1,j)+Told(i+1,j)+Told(i,j-1)+Told(i,j+1)-4*Told(i,j) )/h^2 ! ! with (time_step)=h^2/4 ! ! -> Tnew(i,j) = ( Told(i-1,j)+Told(i+1,j)+Told(i,j-1)+Told(i,j+1) )/4 ! ! So, it is just a simple arithmetic average of the neighbor values. ! ! !---------------------------- ! To perform the Jacobi iteration in parallel with MPI, the domain is ! decomposed into 'npes' sub-domains or partitions, or processor elements (pe). ! However it is called, it is just a piece of the domain. ! In this program, we call it partition or partition element (pe). ! ! Here is an example of npes=4: mype is the ID of each pe. ! _____________________________ ! | | ! | mype=3 | <-- Top partition, mype=npes-1 ! |_____________________________| ! | | ! | mype=2 | ! |_____________________________| ! | | ! | mype=1 | ! |_____________________________| ! | | ! | mype=0 | <-- Bottom partition, mype=0 ! |_____________________________| ! ! ! Each processor executes the same program on the corresponding sub-domain. ! (Note: This is the basic idea of MPI. We write one program, and run it in parallel ! on each decomposed sub-domain.) ! ! Each partition has ghost (or frindge or off-processor) nodes as below: ! _____________________________ ! |______top ghost nodes________| (j=ny+1) ! | | ! | interior | ! |_____________________________| ! |______bottom ghost nodes_____| (j=0) ! ! ! Note, however, that the top partition has no ghost nodes at the top: ! _____________________________ ! |______top boundary nodes_____| (j=ny+1, y=1.0) ! | | ! | mype = npes-1 | <-- Top partition, mype=npes-1 ! |_____________________________| ! |______bottom ghost nodes_____| (j=0) ! ! and the bottom partition has unused ghost nodes: ! _____________________________ ! |_______top ghost nodes_______| (j=ny+1) ! | | ! | interior | <-- Bottom partition, mype=0 ! |_____________________________| ! |___bottom boundary nodes_____| (j=1, y=0.0) ! |______bottom ghost nodes_____| (j=0) <-- We don't use these nodes. ! ! !******************************************************************************** !******************************************************************************** !******************************************************************************** program jacobi_iteration_mpi implicit none include 'mpif.h' integer, parameter :: p2 = selected_real_kind(15) !Double precision !Global domain integer , parameter :: nx_global = 512 integer , parameter :: ny_global = 512 real(p2), parameter :: max_temp_error = 1.0e-06 integer , parameter :: max_iterations = 500000 real(p2) :: dt_global = 100.0 !Maximum iterative error (initialized as 100) !Local domain ( in the sub-domain or partition, or processor element (pe) ) integer :: nx, ny real(p2) :: start_time, stop_time integer :: i, j, iteration real(p2) :: dt !Local Maximum iterative error real(p2) :: h, xp,yp, yp_min_loc real(p2), allocatable, dimension(:,:) :: temperature real(p2), allocatable, dimension(:,:) :: temperature_old real(p2), allocatable, dimension(:,:) :: temperature_exact real(p2), allocatable, dimension(:,:) :: x, y !MPI related variables integer :: mype integer :: npes integer :: ierr integer, dimension(4) :: req = MPI_REQUEST_NULL integer :: status(MPI_STATUS_SIZE) integer :: status_array(MPI_STATUS_SIZE,4) integer, parameter :: id_top_to_bottom = 100 ! some number integer, parameter :: id_bottom_to_top = 101 ! some number differnet from the above !---------------------------------------------------------------------- ! MPI start up call MPI_INIT(ierr) ! <- ierr=0 if successful call MPI_Comm_size(MPI_COMM_WORLD, & ! <- ??? npes, & ! <- Get the total number of PEs ierr ) ! <- ierr=0 if successful call MPI_Comm_rank(MPI_COMM_WORLD, & ! <- ??? mype, & ! <- Get the ID of the PE ierr ) ! <- ierr=0 if successful if( mype == 0 ) then write(*,*) ' npes = ', npes, ' mype = ', mype write(*,*) write(*,*) ' nx_global = ', nx_global write(*,*) ' ny_global = ', ny_global write(*,*) ' Total nodes = ', nx_global*ny_global endif !---------------------------------------------------------------------- ! Allocate arrays and decompose the domain ! Each partition has nx+2 nodes in x-direction and ny nodes in y-direction. ! The ny nodes in y covers the sub-domain. ! ! Here's an example of npes=4: ! ! _____________________________ j_global=ny_global ! | | j=ny ! | mype=3 | : ! |_____________________________| j=1 ! | | j=ny ! | mype=2 | : ! |_____________________________| j=1 ! | | j=ny ! | mype=1 | : ! |_____________________________| j=1 ! | | j=ny ! | mype=0 | : ! |_____________________________| j=1 ! j_global=1 ! i=0 i=nx+1 ! Local size nx = nx_global ny = ny_global/npes ! Grid spacing. Uniform grid, so, h=1/nx_global=1/ny_global. h = 1.0_p2/real(nx_global,p2) ! Allocate the local arrays allocate( temperature( 0:nx+1, 0:ny+1) ) allocate( temperature_old( 0:nx+1, 0:ny+1) ) allocate( temperature_exact(0:nx+1, 0:ny+1) ) allocate( x( 0:nx+1, 0:ny+1) ) allocate( y( 0:nx+1, 0:ny+1) ) ! Grid data (including the ghost points at j = 0 and ny+1) do j = 0, ny+1 ! y-coordinate yp_min_loc = real(mype )*(1.0/real(npes)) yp = (yp_min_loc) + h*real(j-1) do i = 0, nx+1 x(i,j) = real(i)/real(nx+1) ! x(0)=0.0 and x(nx+1)=1.0 y(i,j) = yp end do end do if( mype == 0 ) then write(*,*) write(*,*) ' nx_local = ', nx write(*,*) ' ny_local = ', ny write(*,*) ' Total local nodes = ', nx*ny endif !---------------------------------------------------------------------- ! Set the exact, initial, and boudnary values. call initialize(x,y,temperature_old, temperature_exact, npes, mype, nx,ny) temperature = temperature_old !---------------------------------------------------------------------- ! Record the starting time in 'start_time' for a timing purpose. call cpu_time(start_time) !********************************************************************** !----- Beginning of Jacobi iteration ! Initialize the iteration counter iteration = 1 ! Continue the iteration as long as dt_global = Max|Tnew-Told| is greater than ! the specified maximum tolerance ('max_temp_error') and the maximum iteration ! is not reached. jacobi_iteration : do while ( dt_global > max_temp_error .and. iteration <= max_iterations) !-------------------------------------------------------------- Jacobi ! Jacobi iteration (== simple average): Update the temperature in the interior. ! Note: Exclude the bottom ghost nodes in mype=0, which lie outside the domain, and so ! are not used in the iteration. do j=1,ny if (mype==0 .and. j==1) cycle ! Skip the bottom boundary nodes (solution fixed) do i=1,nx temperature(i,j)=0.25*(temperature_old(i+1,j ) + temperature_old(i-1,j ) + & temperature_old(i ,j+1) + temperature_old(i ,j-1) ) enddo enddo !-------------------------------------------------------------- Jacobi ! Communication: Update the solution at ghost nodes. !-------------------------------------------------------------- ! Send data !-------------------------------------------------------------- ! ! ! (1)Send top interior data to bottom ghost data ! Note: Send from mype=0,1,2,3 to mype=1,2,3,4, respectively. ! ___________________ ! |___________________| ! | | ! | mype+1 | ! |___________________! ! |___temp(1:nx,0)____| <--+ bottom ghost data ! | ! | ! ___________________ | ! |___________________| | ! | temp(1:nx,ny) | ---+ top interior data ! | mype | ! |___________________| ! |___________________| if (mype < npes-1) then call MPI_Isend(temperature(1,ny), nx, MPI_DOUBLE_PRECISION, & mype+1, id_top_to_bottom, MPI_COMM_WORLD, req(1), ierr) endif ! (2)Send bottom interior data to top ghost data ! Note: Send from mype=1,2,3,4 to mype=0,1,2,3, respectively. ! ___________________ ! |___________________| ! | mype | ! | | ! |___temp(1:nx,1)____! ---+ bottom interior data ! |___________________| | ! | ! ___________________ | ! |__temp(1:nx,ny+1)__| <--+ top ghost data ! | | ! | mype-1 | ! |___________________| ! |___________________| if (mype /= 0) then call MPI_Isend(temperature(1,1), nx, MPI_DOUBLE_PRECISION, & mype-1, id_bottom_to_top, MPI_COMM_WORLD, req(2), ierr) endif !-------------------------------------------------------------- ! Receive ata !-------------------------------------------------------------- ! (1)Receive top interior data and store them in bottom ghost data ! Note: Receive if mype=1,2,3,4 ! ___________________ ! |___________________| ! | | ! | mype | ! |___________________! ! |___temp(1:nx,0)____| <--+ bottom ghost data ! | ! | ! ___________________ | ! |___________________| | ! | temp(1:nx,ny) | ---+ top interior data ! | mype-1 | ! |___________________| ! |___________________| if (mype /= 0) then call MPI_Irecv(temperature_old(1,0), nx, MPI_DOUBLE_PRECISION, & mype-1, id_top_to_bottom, MPI_COMM_WORLD, req(3), ierr) endif ! (2)Receive bottom interior data and store them in top ghost data ! Note: Receive if mype=0,1,2,3. ! ___________________ ! |___________________| ! | mype+1 | ! | | ! |___temp(1:nx,1)____! ---+ bottom interior data ! |___________________| | ! | ! ___________________ | ! |__temp(1:nx,ny+1)__| <--+ top ghost data ! | | ! | mype | ! |___________________| ! |___________________| if (mype /= npes-1) then call MPI_Irecv(temperature_old(1,ny+1), nx, MPI_DOUBLE_PRECISION, & mype+1, id_bottom_to_top, MPI_COMM_WORLD, req(4), ierr) endif ! To wait until Isend and Irecv in the above are completed. call MPI_Waitall(4, req, status_array, ierr) !-------------------------------------------------------------- Jacobi ! Compute the maximum iterative error within the PE (local). dt=0.0 do j=1,ny if (mype==0 .and. j==1) cycle do i=1,nx dt = max( abs(temperature(i,j) - temperature_old(i,j)), dt ) temperature_old(i,j) = temperature(i,j) enddo enddo !----------------------------------------------------------------- Jacobi ! Compute the maximum of the iterative error among all PEs. ! (1)Compute the maximum and store it in the master processor (mype=0). call MPI_Reduce( dt , & ! <- Variable to be collected dt_global, & ! <- Variable in which the result is stored 1, & ! <- number of values = 1 MPI_DOUBLE_PRECISION, & ! <- Type of the value to be reduced MPI_MAX, & ! <- Type of operator = max 0, & ! <- Get the value and store for mype=0 MPI_COMM_WORLD, & ! <- ??? ierr ) ! <- ierr=0 if successful !Note: at this point, only mype=0 has the value for dt_global. ! ____________________ ! | dt_global=? | mype = 3 ! |___________________| ! | dt_global=? | mype = 2 ! |___________________| <-- Example, when npes=4 ! | dt_global=? | mype = 1 ! |___________________| ! | dt_global=xx | mype = 0 ! |___________________| ! (2)Broadcast dt_global so that all processors will have the same dt_global value call MPI_Bcast( dt_global, & ! <- variable to be broadcast 1, & ! <- number of value = 1 MPI_DOUBLE_PRECISION, & ! <- Type of the value 'dt_global' 0, & ! <- Broadcast from mype=0 MPI_COMM_WORLD, & ! <- ??? ierr ) ! <- ierr=0 if successful !Note: Now all processors have the same value of dt_global. ! ____________________ ! | dt_global=xx | mype = 3 ! |___________________| ! | dt_global=xx | mype = 2 ! |___________________| <-- Example, when npes=4 ! | dt_global=xx | mype = 1 ! |___________________| ! | dt_global=xx | mype = 0 ! |___________________| ! !---------------------------------------------------------------------- Jacobi ! Display the iteration number and the maximum iterative error ! only from the master PE, and only at every 1000 iterations. if ( mype == 0 .and. mod(iteration,1000)==0 ) then write(*,*) ' itr = ', iteration, ' Max|Tnew - Told| = ',dt_global endif !---------------------------------------------------------------------- Jacobi ! Update the iteration counter iteration = iteration + 1 enddo jacobi_iteration !----- End of Jacobi iteration !********************************************************************** !---------------------------------------------------------------------- ! Wait until all processors come to this point. ! This is for accurate timing and clean output. call MPI_Barrier(MPI_COMM_WORLD, & ! <- ??? ierr ) ! <- ierr=0 if successful !---------------------------------------------------------------------- ! Store the current time in 'stop_time'. call cpu_time(stop_time) !---------------------------------------------------------------------- ! Print the number of iterations taken and the iterative error, ! and the actual CPU time taken. if( mype == 0 ) then print*, 'Max|Tnew - Told| at iteration ', iteration-1, ' = ',dt_global print*, ' Total time = ', stop_time-start_time, ' seconds.', ' nnodes=',nx*ny endif ! Below will be printed in the file 'fort.xxx' where xxx=mype+1000. write(mype+1000,*) ' Total time = ', stop_time-start_time, ' seconds.', ' nnodes=',nx*ny !---------------------------------------------------------------------- ! Write a tecplot file for plotting the grid and the solution. temperature = temperature_old call write_tecplot_file(x,y,temperature,temperature_exact,nx,ny,mype,npes) !---------------------------------------------------------------------- ! MPI final call call MPI_Finalize(ierr) ! ierr=0 if successful !******************************************************************************** contains ! Below, subroutines used in the above main program are listed. !******************************************************************************** ! Store the exact solution in temperature_exact(:,:). ! Set the exact solution at boundary nodes in temperature_old(:,:), ! which is the boundary condition. At the interior nodes, the solution, ! temperature_old(:,:) is set to be zero as an initial condition for the iteration. ! ! ------------------------------------------------------------------------------ ! Input: x(:,:), y(:,:) = (x,y)-coordinate arrays of size (0:nx+1, 0:ny+1) ! npes = number of partition elements (processors) ! mype = processor ID ! ! Output: temperature_exact(:,:) = exact solution ! temperature_old(:,:) = numerical solution ! ------------------------------------------------------------------------------ ! !******************************************************************************** subroutine initialize(x,y,temperature_old, temperature_exact, npes, mype, nx,ny) implicit none integer , intent( in) :: nx, ny, npes,mype real(p2), intent( in), dimension(0:nx+1,0:ny+1) :: x, y real(p2), intent(out), dimension(0:nx+1,0:ny+1) :: temperature_old real(p2), intent(out), dimension(0:nx+1,0:ny+1) :: temperature_exact integer :: i, j real(p2) :: xp, yp real(p2) , parameter :: pi = 3.141592653589793238 !--------------------------------------------------------------------------- ! First, store the exact solution everywhere. do j = 0, ny+1 do i = 0, nx+1 xp = x(i,j) yp = y(i,j) temperature_exact(i,j) = (sinh(pi*xp)*sin(pi*yp)+sinh(pi*yp)*sin(pi*xp))/sinh(pi) temperature_old(i,j) = temperature_exact(i,j) end do end do !--------------------------------------------------------------------------- ! Well, let's start the iteration with zero values in the interior instead ! of the exact solution. So, overwrite the solution in the interior! do j = 0, ny+1 ! To skip the bottom domain-boundary(BC) and unused ghost nodes if (mype==0 .and. j < 2 ) cycle ! To skip the top domain-boundary(BC) if (mype==npes-1 .and. j == ny+1) cycle do i = 1, nx ! <-- i=1..nx because we set zero solution at interior nodes only. temperature_old(i,j) = 0.0 end do end do ! So, now we have the exact solution only on the boundary nodes. end subroutine initialize !******************************************************************************** !******************************************************************************** ! This subroutine writes a Tecplot data file for plotting the grid ! and the solution with the filename: tecplot_mype.dat ! ! For example, if npes=4, then four files will be generated: ! ! tecplot_0.dat ! tecplot_1.dat ! tecplot_2.dat ! tecplot_3.dat ! ! Plot all these files in one place, you'll see the entire domain. ! ! ------------------------------------------------------------------------------ ! Input: x(:,:), y(:,:) = (x,y)-coordinate arrays of size (0:nx+1, 0:ny+1) ! temperature_old(:,:) = numerical solution ! temperature_exact(:,:) = exact solution ! npes = number of partition elements (processors) ! mype = processor ID ! ! Output: tecplot_mype.dat = Tecplot file for plotting (mype = processor ID) ! ------------------------------------------------------------------------------ ! !******************************************************************************** subroutine write_tecplot_file(x,y,temperature,temperature_exact,nx,ny,mype,npes) implicit none integer , intent(in) :: nx, ny, mype,npes real(p2), intent(in), dimension(0:nx+1,0:ny+1) :: x, y real(p2), intent(in), dimension(0:nx+1,0:ny+1) :: temperature,temperature_exact integer :: i, j character(80) :: char_temp, filename !--------------------------------------------------------------------------- ! Store the value of 'mype' as a character in the character variable 'char_temp'. write(char_temp,'(i5)') mype !--------------------------------------------------------------------------- ! Define the file name filename = "tecplot_" // trim(adjustl(char_temp)) // '.dat' !--------------------------------------------------------------------------- ! Open the file and start writing the data! open(unit=1, file=filename, status="unknown") !--------------------------------------------------------------------------- ! Header write(1,*) 'title =', '"Partition_'// trim(adjustl(char_temp)), '"' write(1,*) 'variables = "x", "y", "Temperature", "Temperature exact" "T-Texact"' if (mype==0) then write(1,*) 'zone T=','Partition_'// trim(adjustl(char_temp)),' i=', nx+2, 'j=', ny+1, 'f=point' else write(1,*) 'zone T=','Partition_'// trim(adjustl(char_temp)),' i=', nx+2, 'j=', ny+2, 'f=point' endif !--------------------------------------------------------------------------- ! Note: Exclude the bottom ghost nodes in mype=0, which lie outside the domain, and so ! are not used in the iteration and completely irrelevant to the soltuion. ! Write grid data (including the ghost points at j = 0 and ny+1) do j = 0, ny+1 if (mype==0 .and. j==0) cycle do i = 0, nx+1 write(1,*) x(i,j), y(i,j), temperature(i,j), temperature_exact(i,j), & temperature(i,j) - temperature_exact(i,j) ! <- error end do end do !--------------------------------------------------------------------------- ! Close the file close(1) end subroutine write_tecplot_file !******************************************************************************** end program jacobi_iteration_mpi !******************************************************************************** !********************************************************************************