subroutine da_trace_report !-------------------------------------------------------------------- ! Purpose: Produce a trace report !-------------------------------------------------------------------- implicit none integer :: i ! loop counter integer :: j ! loop counter integer :: CountRate integer :: MasterNoRoutines integer :: temp integer :: MinElapsedPos integer :: MaxElapsedPos integer :: MinCPUPos integer :: MaxCPUPos integer :: itemp1(MaxNoRoutines) integer :: MasterMaxHeap(0:num_procs-1,MaxNoRoutines) integer :: MasterNoCalls(0:num_procs-1,MaxNoRoutines) integer :: OverallNoCalls(MaxNoRoutines) integer, allocatable :: Index(:) real :: TempCpuTime real :: TotalElapsedTime ! real :: TotalCPUTime(1) ! real :: SpeedUp ! speed up factor real :: PercentCPUTime ! percentage in CPU time real :: PercentElapsedTime ! percentage in elapsed time real :: rtemp1(MaxNoRoutines) real :: MasterElapsedTime(0:num_procs-1,MaxNoRoutines) real :: MasterElapsedTimeLocal(0:num_procs-1,MaxNoRoutines) real :: MasterCPUTime(0:num_procs-1,MaxNoRoutines) real :: MasterCPUTimeLocal(0:num_procs-1,MaxNoRoutines) real :: OverallElapsedTime(MaxNoRoutines) real :: OverallCPUTime(MaxNoRoutines) character (len=TraceNameLen) :: MasterTimerNames(MaxNoRoutines) if (.not. use_html) then write (unit=trace_unit, fmt='(A)') & "Report only available if use_html is true" return end if call system_clock (COUNT=temp) TotalElapsedTime=temp-BaseElapsedTime ! on PE 0 call cpu_time(TempCpuTime) TotalCPUTime(1) = TempCpuTime - BaseCPUTime call system_clock(& COUNT_RATE=CountRate) ! Ensure the lists from each PE match. use the routine list from the ! traced PE as the master copy MasterTimerNames(:)=TimerNames(:) if (myproc == trace_pe) then MasterNoRoutines=NoRoutines else MasterNoRoutines=0 end if #ifdef DM_PARALLEL call da_proc_sum_int(MasterNoRoutines) ! only PE 0 knows the result call mpi_bcast(MasterTimerNames(1:MaxNoRoutines), & TraceNameLen*MaxNoRoutines, & MPI_character,trace_pe, comm,ierr) #endif MasterElapsedTime(:,:)=0.0 MasterCPUTime(:,:)=0.0 MasterElapsedTimeLocal(:,:)=0.0 MasterCPUTimeLocal(:,:)=0.0 MasterNoCalls(:,:)=0 MasterMaxHeap(:,:)=0 do i=1,MaxNoRoutines do j=1,NoRoutines if (TimerNames(j) == MasterTimerNames(i)) then MasterElapsedTime(myproc,i)=ElapsedTime(j) MasterCPUTime(myproc,i)=CPUTime(j) MasterElapsedTimeLocal(myproc,i)=ElapsedTimeLocal(j) MasterCPUTimeLocal(myproc,i)=CPUTimeLocal(j) MasterNoCalls(myproc,i)=NoCalls(j) MasterMaxHeap(myproc,i)=MaxHeap(j) cycle end if end do end do #ifdef DM_PARALLEL do i=0,num_procs-1 call da_proc_sum_real(MasterElapsedTime(i,:)) call da_proc_sum_real(MasterCPUTime(i,:)) call da_proc_sum_real(MasterElapsedTimeLocal(i,:)) call da_proc_sum_real(MasterCPUTimeLocal(i,:)) call da_proc_sum_ints(MasterNoCalls(i,:)) call da_proc_sum_ints(MasterMaxHeap(i,:)) end do #endif if (rootproc) then do j=1,MasterNoRoutines rtemp1(j)=sum(MasterElapsedTimeLocal(:,j)) end do !========================================================================== ! Sort subroutines into time order based on local Elapsed Time. ! All PEs should have the same sort order after the sum. !========================================================================== allocate (Index(MasterNoRoutines)) call da_trace_real_sort(rtemp1,MasterNoRoutines,index) do j=1,MasterNoRoutines OverallElapsedTime(j)=sum(MasterElapsedTimeLocal(:,j)) OverallCPUTime(j)=sum(MasterCPUTimeLocal(:,j)) OverallNoCalls(j)=sum(MasterNoCalls(:,j)) end do write(unit=trace_unit, fmt='(A,I4,A)') & "
" else write (unit=trace_unit,fmt='(A)') & "No estimate can be made of time in trace itself.
" end if write(unit=trace_unit, & fmt='("
Routine Name | Calls | Elapsed Time (seconds) | ")') write(unit=trace_unit, & fmt='("CPU Time (seconds) | Speed up")') write(unit=trace_unit, & fmt='(" | ||||||
---|---|---|---|---|---|---|---|---|---|---|
per PE | Average per PE | % | Minimum | Maximum | Total | % | Minimum | Maximum")') write(unit=trace_unit, & fmt='(" | ",I4," PE")') num_procs do i=MasterNoRoutines,1,-1 Pointer=index(i) if (TotalCPUTime(1) > 0.0) then PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1) else PercentCPUTime=100.0 end if if (TotalElapsedTime > 0.0) then PercentElapsedTime=100.0*OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime) else PercentElapsedTime=100.0 end if if (OverallElapsedTime(Pointer) > 0.0) then SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(CountRate*num_procs)) else SpeedUp = 0.0 end if ! This horrible solution as MinLOC does not take a DIM argument, so sum ! is needed to convert the array to an integer MinElapsedPos = sum(MinLOC(MasterElapsedTimeLocal(:,Pointer)))-1 MaxElapsedPos = sum(MAXLOC(MasterElapsedTimeLocal(:,Pointer)))-1 MinCPUPos = sum(MinLOC(MasterCPUTimeLocal(:,Pointer)))-1 MaxCPUPos = sum(MAXLOC(MasterCPUTimeLocal(:,Pointer)))-1 !---------------------------------------------------------------------- ! Write out results. Note the abnormally long format line is needed as ! the NAG compiler complains if a quoted line is split. !---------------------------------------------------------------------- write (unit=trace_unit, fmt='(7A)') & " | |
", & trim(MasterTimerNames(Pointer)), & ! Subroutine name "" write (unit=trace_unit, & fmt='(" | ",F10.1,2(" | ",F10.2," | ",F10.1,2(" | ",F10.1," on",I3))," | ",F5.2)') & real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time PercentElapsedTime, & ! Percent Elapsed Time MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time MinElapsedPos, & ! Which PE MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time MaxElapsedPos, & ! Which PE OverallCPUTime(Pointer), & ! CPU time PercentCPUTime, & ! Percent CPU time MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time MinCPUPos, & ! Which PE MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time MaxCPUPos, & ! Which PE SpeedUp ! Speedup if (trace_csv) then write(unit=trace_csv_unit, & fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') & '"local",', & '"'//trim(MasterTimerNames(Pointer))//'",', & real(OverallNoCalls(Pointer))/real(num_procs), & ! Average number of calls per PE OverallElapsedTime(Pointer)/(num_procs*real(CountRate)), & ! Average Elapsed Time PercentElapsedTime, & ! Percent Elapsed Time MasterElapsedTimeLocal(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time MinElapsedPos, & ! Which PE MasterElapsedTimeLocal(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time MaxElapsedPos, & ! Which PE OverallCPUTime(Pointer), & ! CPU time PercentCPUTime, & ! Percent CPU time MasterCPUTimeLocal(MinCPUPos,Pointer), & ! Min average CPU Time MinCPUPos, & ! Which PE MasterCPUTimeLocal(MaxCPUPos,Pointer), & ! Max average CPU Time MaxCPUPos, & ! Which PE SpeedUp ! Speedup end if end do write (unit=trace_unit, & fmt='(A,I4,A,F8.1,A,F8.1,A)') & " | |||||
Total",MasterNoRoutines, " | ", & TotalElapsedTime/real(CountRate), & " | ", & TotalCPUTime(1)," | " if (TotalElapsedTime > 0.0) then write (unit=trace_unit, fmt='(" | ",F8.1,"")') & (TotalCPUTime(1))/(TotalElapsedTime/real(CountRate)) end if write(unit=trace_unit, & fmt='(" |
")') if (trace_csv) then write(unit=trace_csv_unit,fmt=*) " " end if !=================================================================================== ! Sort subroutines into time order based on overall Elapsed Time. ! All PEs should have the same sort order after the sum. !=================================================================================== do j=1,MasterNoRoutines rtemp1(j)=sum(MasterElapsedTime(:,j)) end do call da_trace_real_sort(rtemp1,MasterNoRoutines,index) do j=1,MasterNoRoutines OverallElapsedTime(j)=sum(MasterElapsedTime(:,j)) OverallCPUTime(j)=sum(MasterCPUTime(:,j)) end do ! Output timing information write(unit=trace_unit, & fmt='("
Routine Name | Calls | Elapsed Time (seconds) | ")') write(unit=trace_unit, & fmt='("CPU Time (seconds) | Speed up")') write(unit=trace_unit, & fmt='(" | ||||||
---|---|---|---|---|---|---|---|---|---|---|
per PE | Average per PE | % | Minimum | Maximum | Total | % | Minimum | Maximum")') write(unit=trace_unit, & fmt='(" | ",I4," PE")') num_procs do i=MasterNoRoutines,1,-1 Pointer=index(i) if (TotalCPUTime(1) > 0.0) then PercentCPUTime=100.0 * OverallCPUTime(Pointer)/TotalCPUTime(1) else PercentCPUTime=100.0 end if if (TotalElapsedTime > 0.0) then PercentElapsedTime=100.0 * OverallElapsedTime(Pointer)/(real(num_procs) * TotalElapsedTime) else PercentElapsedTime=100.0 end if if (OverallElapsedTime(Pointer) > 0.0) then SpeedUp = OverallCPUTime(Pointer) / (OverallElapsedTime(Pointer)/real(num_procs*CountRate)) else SpeedUp = 0.0 end if ! This horrible solution as MinLOC does not take a DIM argument, so ! sum is needed to convert the array to an integer MinElapsedPos = sum(MinLOC(MasterElapsedTime(:,Pointer)))-1 MaxElapsedPos = sum(MAXLOC(MasterElapsedTime(:,Pointer)))-1 MinCPUPos = sum(MinLOC(MasterCPUTime(:,Pointer)))-1 MaxCPUPos = sum(MaxLOC(MasterCPUTime(:,Pointer)))-1 !---------------------------------------------------------------------- ! Write out results. Note the abnormally long format line is needed as ! the NAG compiler complains if a quoted line is split. !---------------------------------------------------------------------- write (unit=trace_unit, fmt='(7A)') & " | |
", & trim(MasterTimerNames(Pointer)), & ! Subroutine name "" write (unit=trace_unit, & fmt='(" | ",F10.1,2(" | ",F10.2," | ",F10.1,2(" | ",F10.1," on",I3))," | ",F5.2)') & real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time PercentElapsedTime, & ! Percent Elapsed Time MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time MinElapsedPos, & ! Which PE MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time MaxElapsedPos, & ! Which PE OverallCPUTime(Pointer), & ! CPU time PercentCPUTime, & ! Percent CPU time MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time MinCPUPos, & ! Which PE MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time MaxCPUPos, & ! Which PE SpeedUp ! SpeedUp if (trace_csv) then write (unit=trace_csv_unit, & fmt='(2A,F10.1,2(",",F10.2,",",F10.1,2(",",F10.1,",",I3)),",",F5.2)') & '"overall",', & '"'//trim(MasterTimerNames(Pointer))//'",', & real(OverallNoCalls(Pointer))/real(num_procs), & ! Number of calls per PE OverallElapsedTime(Pointer)/(real(num_procs*CountRate)), & ! Average Elapsed Time PercentElapsedTime, & ! Percent Elapsed Time MasterElapsedTime(MinElapsedPos,Pointer)/real(CountRate), & ! Min average Elapsed Time MinElapsedPos, & ! Which PE MasterElapsedTime(MaxElapsedPos,Pointer)/real(CountRate), & ! Max average Elapsed Time MaxElapsedPos, & ! Which PE OverallCPUTime(Pointer), & ! CPU time PercentCPUTime, & ! Percent CPU time MasterCPUTime(MinCPUPos,Pointer), & ! Min average CPU Time MinCPUPos, & ! Which PE MasterCPUTime(MaxCPUPos,Pointer), & ! Max average CPU Time MaxCPUPos, & ! Which PE SpeedUp ! SpeedUp end if end do write (unit=trace_unit, & fmt='(A,I4,A,F8.1,A,F8.1,A)') & " | |||||
Total",MasterNoRoutines, " | ", & TotalElapsedTime/real(CountRate), & " | ",TotalCPUTime(1), & " | " if (TotalElapsedTime > 0.0) then write (unit=trace_unit, fmt='(" | ",F8.1,"")') & TotalCPUTime(1)/(TotalElapsedTime/real(CountRate)) end if write(unit=trace_unit, & fmt='(" |
Routine | Max in any PE (kbytes)")') write (unit=trace_unit,fmt='(" | Overall (kbytes) | Average per PE (kbytes)")') do i=MasterNoRoutines,1,-1 Pointer=index(i) write (unit=trace_unit, & fmt='(" |
---|---|---|---|
",A," | ",I15," | ",I15," | ",I15)') & trim(Documentation_url),trim(TimerNames(Pointer)),trim(TimerNames(Pointer)), & MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), & sum(MasterMaxHeap(:,Pointer))/num_procs if (trace_csv) then write (unit=trace_csv_unit, & fmt='(2A,I15,",",I15,",",I15)') & '"memory",', & '"'//trim(TimerNames(Pointer))//'",', & MAXVAL(MasterMaxHeap(:,Pointer)),sum(MasterMaxHeap(:,Pointer)), & sum(MasterMaxHeap(:,Pointer))/num_procs end if end do write (unit=trace_unit,fmt='(" |