!! Routines that run the PFASST algorithm ! ! This file is part of LIBPFASST. ! !> Module of routines to run PFASST module pf_mod_parallel use pf_mod_pfasst use pf_mod_interpolate use pf_mod_restrict use pf_mod_utils use pf_mod_timer use pf_mod_dtype use pf_mod_hooks use pf_mod_comm use pf_mod_results implicit none contains !> This is the main interface to pfasst. !! It examines the parameters and decides which subroutine to call !! to execute the code correctly subroutine pf_pfasst_run(pf, q0, dt, tend, nsteps, qend, flags) type(pf_pfasst_t), intent(inout), target :: pf !! The complete PFASST structure class(pf_encap_t), intent(in ) :: q0 !! The initial condition real(pfdp), intent(in ) :: dt !! The time step for each processor real(pfdp), intent(in ) :: tend !! The final time of run integer, intent(in ), optional :: nsteps !! The number of time steps class(pf_encap_t), intent(inout), optional :: qend !! The computed solution at tend integer, intent(in ), optional :: flags(:)!! User defnined flags ! Local variables integer :: nproc !! Total number of processors integer :: nsteps_loc !! local number of time steps real(pfdp) :: tend_loc !! The final time of run ! make a local copy of nproc nproc = pf%comm%nproc !> Set the number of time steps to do !! The user can either pass in the number of time steps or !! pass in the time step size and length of run if (present(nsteps)) then nsteps_loc = nsteps tend_loc=dble(nsteps_loc*dt) else nsteps_loc = ceiling(tend/dt) ! Do sanity check on steps if (abs(real(nsteps_loc,pfdp)-tend/dt) > dt/100.0) then print *,'dt=',dt print *,'nsteps=',nsteps_loc print *,'tend=',tend stop "Invalid nsteps" end if end if pf%state%nsteps = nsteps_loc !> Allocate stuff for holding results call initialize_results(pf%results,nsteps_loc, pf%niters, pf%comm%nproc, pf%nlevels,pf%rank) ! do sanity checks on Nproc if (mod(nsteps,nproc) > 0) stop "ERROR: nsteps must be multiple of nproc (pf_parallel.f90)." if (present(qend)) then call pf_block_run(pf, q0, dt, nsteps_loc,qend=qend,flags=flags) else call pf_block_run(pf, q0, dt, nsteps_loc,flags=flags) end if if (pf%save_results) call pf%results%dump(pf%results) ! What we would like to do is check for ! 1. nlevels==1 and nprocs ==1 -> Serial SDC ! Predictor is either spreadQ or nothing ! Then we just call a loop on sweeps ! Communication is copy ! 2. nlevels > 1 and nprocs ==1 -> Serial MLSDC ! Predictor is needed to populate levels (or nothing) ! Then we just call a loop on MLSDC sweeps ! Communication is copy ! 3. nlevels == 1 and nprocs > 1 -> Pipelined SDC ! Predictor is just like PFASST, but on finest (only) level (or nothing) ! 4. nlevels > 1 and nprocs > 1 -> PFASST end subroutine pf_pfasst_run ! !> PFASST Predictor. !> Subroutine to initialize the solution on each processor !! The goal is to have a solution at each level and each node set to a consistent value !! When this is called, the value of q0 at the fine level on each processor has been set somehow (see q0_style below) !! !! This can be broken down into four substeps !! 1. Get the initial condition on the finest level at each node !! 2. Coarsen the initial condition to each coarser level with tau corrections !! 3. Do the "Burn in" step on the coarse level to make the coarse values consistent !! (this is skipped if the fine initial conditions are already consistent) !! 4. Do some coarse grid sweeps to improve initial solutions on coarsest nodes !! 5. Interpolating coarse correction back to finer levels sweeping along the way. !! !! There are several parameters or flags that determine how things are done: !! integer q0_style: can take 3 values !! 0: Only the q0 at t=0 is valid (default) !! 1: The q0 at each processor is valid !! 2: q0 and all nodes at each processor is valid !! logical PFASST_pred: If true, the burn-in step uses the "PFASST predictor" trick !! integer nsweeps_burn: Determines how many sweeps are done on the coarse level during burn in !! integer nsweeps_pred: Determines how many sweeps are done at the coarse level after burn in !! logical Pipeline_burn: True if coarse sweeps during burn in are pipelined (meaningless if nsweeps_burn>1 on coarse level) !! logical Pipeline_pred: True if coarse sweeps after burn in are pipelined (meaningless if nsweeps_pred>1 on coarse level) !! Pipeline variables do nothing if there is only one processor !! logical RK_pred: If true, the coarse level is initialized with Runge-Kutta instead of the PFASST burn in. !! We will still do coarse sweeps after and correct finer levels !! !! The user defined flags(:) parameter is used to determine whether we are in a (standard) forward-in-time run (flags(1) == 1) !! or backward-in-time (for the adjoint) with a given terminal condition qend instead of initial condition q0 (flags(1) == 2). !! In the latter case, e.g., sweeper%spreadq0 has to do the correct thing (i.e., spread qend instead of q0). !! !! No time communication is performed during the predictor since all !! procesors can do the work themselves !! !! The iteration count is reset to 0, and the status is reset to !! ITERATING. subroutine pf_predictor(pf, t0, dt, flags) type(pf_pfasst_t), intent(inout), target :: pf !! PFASST main data structure real(pfdp), intent(in ) :: t0 !! Initial time of this processor real(pfdp), intent(in ) :: dt !! time step integer, intent(in ), optional :: flags(:) !! User defined flags class(pf_level_t), pointer :: c_lev_p class(pf_level_t), pointer :: f_lev_p !! integer :: k !! Loop indices integer :: level_index !! Local variable for looping over levels real(pfdp) :: t0k !! Initial time at time step k call call_hooks(pf, 1, PF_PRE_PREDICTOR) call start_timer(pf, TPREDICTOR) if (pf%debug) print*, 'DEBUG --', pf%rank, 'beginning predictor' !! !! Step 1. Getting the initial condition on the finest level at each processor !! If we are doing multiple levels, then we need to coarsen to fine level f_lev_p => pf%levels(pf%nlevels) if (pf%q0_style < 2) then ! Spread q0 to all the nodes call f_lev_p%ulevel%sweeper%spreadq0(f_lev_p, t0) endif !! !! Step 2: Proceed fine to coarse levels coarsening the fine solution and computing tau correction if (pf%debug) print*, 'DEBUG --', pf%rank, 'do coarsen in predictor' if (pf%nlevels > 1) then do level_index = pf%nlevels, 2, -1 f_lev_p => pf%levels(level_index); c_lev_p => pf%levels(level_index-1) call pf_residual(pf, f_lev_p, dt) call f_lev_p%ulevel%restrict(f_lev_p, c_lev_p, f_lev_p%q0, c_lev_p%q0, t0) call restrict_time_space_fas(pf, t0, dt, level_index) ! Restrict call save(c_lev_p) end do ! level_index = pf%nlevels, 2, -1 else level_index = 1 c_lev_p => pf%levels(1) end if !! !! Step 3. Do the "Burn in" step on the coarse level to make the coarse values consistent !! (this is skipped if the fine initial conditions are already consistent) !! The first processor does nothing, the second does one set of sweeps, the third two, etc !! Hence, this is skipped completely if nprocs=1 if (pf%debug) print*, 'DEBUG --', pf%rank, 'do burnin in predictor' if (pf%q0_style .eq. 0) then ! The coarse level needs burn in !! If RK_pred is true, just do some RK_steps if (pf%RK_pred) then ! Use Runge-Kutta to get the coarse initial data ! Get new initial conditions call pf_recv(pf, c_lev_p, 100000+pf%rank, .true.) ! Do a RK_step call c_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, 1) ! Send forward call pf_send(pf, c_lev_p, 100000+pf%rank+1, .false.) else ! Normal PFASST burn in level_index=1 c_lev_p => pf%levels(level_index) do k = 1, pf%rank + 1 pf%state%iter = -k t0k = t0-(pf%rank)*dt + (k-1)*dt ! Remember t0=pf%rank*dt is the beginning of this time slice so t0-(pf%rank)*dt is 0 ! and we iterate up to the correct time step. ! for optimal control problem t, t0k has no influence on f_eval, so there this does something else ! Get new initial value (skip on first iteration) if (k > 1) then call c_lev_p%q0%copy(c_lev_p%qend,flags=1) ! If we are doing PFASST_pred, we use the old values at nodes, otherwise spread q0 if (.not. pf%PFASST_pred) then call c_lev_p%ulevel%sweeper%spreadq0(c_lev_p, t0k) end if end if ! Do some sweeps call c_lev_p%ulevel%sweeper%sweep(pf, level_index, t0k, dt,pf%nsweeps_burn) end do endif ! RK_pred end if ! (q0_style .eq. 0) !! !! Step 4: Now we have everyone burned in, so do some coarse sweeps if (pf%nlevels > 1) then if (pf%debug) print*, 'DEBUG --', pf%rank, 'do sweeps in predictor', 'Pipeline_pred',pf%Pipeline_pred pf%state%pstatus = PF_STATUS_ITERATING pf%state%status = PF_STATUS_ITERATING if (pf%Pipeline_pred) then do k = 1, c_lev_p%nsweeps_pred pf%state%iter =-(pf%rank + 1) -k ! Get new initial conditions call pf_recv(pf, c_lev_p, c_lev_p%index*110000+pf%rank+k, .true.) ! Do a sweep call c_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, 1) ! Send forward call pf_send(pf, c_lev_p, c_lev_p%index*110000+pf%rank+1+k, .false.) end do ! k = 1, c_lev_p%nsweeps_pred-1 else ! Don't pipeline ! Get new initial conditions call pf_recv(pf, c_lev_p, c_lev_p%index*110000+pf%rank, .true.) ! Do a sweeps call c_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, c_lev_p%nsweeps_pred) ! Send forward call pf_send(pf, c_lev_p, c_lev_p%index*110000+pf%rank+1, .false.) endif ! (Pipeline_pred .eq. .true) then end if if (pf%debug) print*, 'DEBUG --', pf%rank, 'returning to fine level in predictor' !! !! Step 5: Return to fine level sweeping on any level in between coarsest and finest do level_index = 2, pf%nlevels ! Will do nothing with one level f_lev_p => pf%levels(level_index); c_lev_p => pf%levels(level_index-1) call interpolate_time_space(pf, t0, dt, level_index, c_lev_p%Finterp) call interpolate_q0(pf, f_lev_p, c_lev_p) ! Do a sweep on level unless we are at the finest level if (level_index < pf%nlevels) then call f_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, f_lev_p%nsweeps_pred) end if end do call end_timer(pf, TPREDICTOR) call call_hooks(pf, -1, PF_POST_PREDICTOR) pf%state%iter = 0 pf%state%status = PF_STATUS_ITERATING pf%state%pstatus = PF_STATUS_ITERATING if (pf%debug) print*, 'DEBUG --', pf%rank, 'ending predictor' end subroutine pf_predictor !> Subroutine to test residuals to determine if the current processor has converged. subroutine pf_check_residual(pf, residual_converged) type(pf_pfasst_t), intent(inout) :: pf logical, intent(out) :: residual_converged !! Return true if residual is below tolerances residual_converged = .false. ! Check to see if relative tolerance is met if (pf%levels(pf%nlevels)%residual_rel < pf%rel_res_tol) then if (pf%debug) print*, 'DEBUG --', pf%rank, ' residual relative tol met',pf%levels(pf%nlevels)%residual_rel residual_converged = .true. end if ! Check to see if relative tolerance is met if (pf%levels(pf%nlevels)%residual < pf%abs_res_tol) then if (pf%debug) print*, 'DEBUG --',pf%rank, 'residual tol met',pf%levels(pf%nlevels)%residual residual_converged = .true. end if end subroutine pf_check_residual !> Subroutine to check if the current processor has converged and !> to update the next processor on the status !> Note that if the previous processor hasn't converged yet !> (pstatus), the current processor can't be converged yet either subroutine pf_check_convergence_block(pf, send_tag) type(pf_pfasst_t), intent(inout) :: pf integer, intent(in) :: send_tag !! identifier for status send and receive logical :: residual_converged, converged ! Shortcut for fixed iteration mode if (pf%abs_res_tol == 0 .and. pf%rel_res_tol == 0) then pf%state%pstatus = PF_STATUS_ITERATING pf%state%status = PF_STATUS_ITERATING return end if call call_hooks(pf, 1, PF_PRE_CONVERGENCE) !> Check to see if tolerances are met call pf_check_residual(pf, residual_converged) !> Until I hear the previous processor is done, recieve it's status if (pf%state%pstatus /= PF_STATUS_CONVERGED) call pf_recv_status(pf, send_tag) !> Check to see if I am converged converged = .false. if (residual_converged) then if (pf%rank == 0) then converged = .true. else ! I am not the first processor, so I need to check the previous one if (pf%state%pstatus == PF_STATUS_CONVERGED) converged = .true. end if end if ! (residual_converged) !> Assign status and send it forward if (converged) then if (pf%state%status == PF_STATUS_ITERATING) then ! If I am converged for the first time ! then flip my flag and send the last status update pf%state%status = PF_STATUS_CONVERGED call pf_send_status(pf, send_tag) end if else ! I am not converged, send the news pf%state%status = PF_STATUS_ITERATING call pf_send_status(pf, send_tag) end if call call_hooks(pf, 1, PF_POST_CONVERGENCE) end subroutine pf_check_convergence_block ! !> PFASST controller for block mode subroutine pf_block_run(pf, q0, dt, nsteps, qend,flags) type(pf_pfasst_t), intent(inout), target :: pf class(pf_encap_t), intent(in ) :: q0 real(pfdp), intent(in ) :: dt integer, intent(in ) :: nsteps class(pf_encap_t), intent(inout), optional :: qend integer, intent(in ), optional :: flags(:) class(pf_level_t), pointer :: lev_p !! pointer to the one level we are operating on integer :: j, k integer :: nblocks !! The number of blocks of steps to do integer :: nproc !! The number of processors being used integer :: level_index_c !! Coarsest leve in V-cycle call start_timer(pf, TTOTAL) pf%state%dt = dt pf%state%proc = pf%rank+1 pf%state%step = pf%rank pf%state%t0 = pf%state%step * dt ! pointer to finest level to start lev_p => pf%levels(pf%nlevels) ! Stick the initial condition into q0 (will happen on all processors) call lev_p%q0%copy(q0, flags=1) nproc = pf%comm%nproc nblocks = nsteps/nproc ! Decide what the coarsest level in the V-cycle is level_index_c=1 if (.not. pf%Vcycle) level_index_c=pf%nlevels do k = 1, nblocks ! Loop over blocks of time steps ! print *,'Starting step=',pf%state%step,' block k=',k ! Each block will consist of ! 1. predictor ! 2. Vcycle until max iterations, or tolerances met ! 3. Move solution to next block ! Reset some flags !> When starting a new block, broadcast new initial conditions to all procs !> For initial block, this is done when initial conditions are set !> Reset some flags pf%state%iter = -1 pf%state%itcnt = 0 pf%state%mysteps = 0 pf%state%status = PF_STATUS_PREDICTOR pf%state%pstatus = PF_STATUS_PREDICTOR pf%comm%statreq = -66 if (k > 1) then if (nproc > 1) then call lev_p%qend%pack(lev_p%send) !! Pack away your last solution call pf_broadcast(pf, lev_p%send, lev_p%mpibuflen, pf%comm%nproc-1) call lev_p%q0%unpack(lev_p%send) !! Everyone resets their q0 else call lev_p%q0%copy(lev_p%qend, flags=1) !! Just stick qend in q0 end if !> Update the step and t0 variables for new block pf%state%step = pf%state%step + pf%comm%nproc pf%state%t0 = pf%state%step * dt end if !> Call the predictor to get an initial guess on all levels and all processors call pf_predictor(pf, pf%state%t0, dt, flags) !> Start the loops over SDC sweeps pf%state%iter = 0 call call_hooks(pf, -1, PF_POST_ITERATION) call start_timer(pf, TITERATION) do j = 1, pf%niters call call_hooks(pf, -1, PF_PRE_ITERATION) pf%state%iter = j ! Do a v_cycle call pf_v_cycle(pf, k, pf%state%t0, dt,level_index_c,pf%nlevels) ! Check for convergence call pf_check_convergence_block(pf, send_tag=1111*k+j) if (pf%save_results) pf%results%residuals(pf%state%iter, k, lev_p%index) = lev_p%residual ! print *,pf%rank, ' post res' call call_hooks(pf, -1, PF_POST_ITERATION) ! If we are converged, exit block if (pf%state%status == PF_STATUS_CONVERGED) exit end do ! Loop over the iteration in this bloc call call_hooks(pf, -1, PF_POST_CONVERGENCE) call end_timer(pf, TITERATION) end do ! Loop over the blocks call end_timer(pf, TTOTAL) ! Grab the last solution for return (if wanted) if (present(qend)) then call qend%copy(lev_p%qend, flags=1) end if end subroutine pf_block_run !> Execute a V-cycle between levels nfine and ncoarse subroutine pf_v_cycle(pf, iteration, t0, dt,level_index_c,level_index_f, flags) type(pf_pfasst_t), intent(inout), target :: pf real(pfdp), intent(in) :: t0, dt integer, intent(in) :: iteration integer, intent(in) :: level_index_c !! Coarsest level of V-cycle integer, intent(in) :: level_index_f !! Finest level of V-cycle integer, optional, intent(in) :: flags type(pf_level_t), pointer :: f_lev_p, c_lev_p integer :: level_index, j !> Post the nonblocking receives on the all the levels that will be recieving later !> (for single level this will be skipped) do level_index = level_index_c+1, level_index_f f_lev_p => pf%levels(level_index) call pf_post(pf, f_lev_p, f_lev_p%index*10000+iteration) end do !> move from fine to coarse doing sweeps do level_index = level_index_f, level_index_c+1, -1 f_lev_p => pf%levels(level_index); c_lev_p => pf%levels(level_index-1) call f_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, f_lev_p%nsweeps) call pf_send(pf, f_lev_p, level_index*10000+iteration, .false.) call restrict_time_space_fas(pf, t0, dt, level_index) call save(c_lev_p) end do ! Do the coarsest level level_index=level_index_c f_lev_p => pf%levels(level_index) if (pf%pipeline_pred) then do j = 1, f_lev_p%nsweeps call pf_recv(pf, f_lev_p, f_lev_p%index*10000+iteration+j, .true.) call f_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, 1) call pf_send(pf, f_lev_p, f_lev_p%index*10000+iteration+j, .false.) end do else call pf_recv(pf, f_lev_p, f_lev_p%index*10000+iteration, .true.) call f_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, f_lev_p%nsweeps) call pf_send(pf, f_lev_p, level_index*10000+iteration, .false.) endif ! Now move coarse to fine interpolating and sweeping do level_index = level_index_c+1,level_index_f f_lev_p => pf%levels(level_index); c_lev_p => pf%levels(level_index-1) call interpolate_time_space(pf, t0, dt, level_index, c_lev_p%Finterp) call pf_recv(pf, f_lev_p, level_index*10000+iteration, .false.) if (pf%rank /= 0) then ! interpolate increment to q0 -- the fine initial condition ! needs the same increment that Q(1) got, but applied to the ! new fine initial condition call interpolate_q0(pf, f_lev_p, c_lev_p) end if ! don't sweep on the finest level since that is only done at beginning if (level_index < level_index_f) then call f_lev_p%ulevel%sweeper%sweep(pf, level_index, t0, dt, f_lev_p%nsweeps) else ! compute residual for diagnostics since we didn't sweep call pf_residual(pf, f_lev_p, dt) end if end do end subroutine pf_v_cycle end module pf_mod_parallel