• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

gyrokinetics / gs2 / 1989146477

18 Aug 2025 10:42AM UTC coverage: 10.372% (+2.2%) from 8.184%
1989146477

push

gitlab-ci

David Dickinson
Merged in experimental/more_gs2_init_everywhere (pull request #1134)

4646 of 44794 relevant lines covered (10.37%)

124612.73 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/gs2_transforms.fpp
1
#include "unused_dummy.inc"
2
! Modifications for using FFTW version 3:
3
! (c) The Numerical Algorithms Group (NAG) Ltd, 2009 
4
!                                 on behalf of the HECToR project
5
!> FIXME : Add documentation
6
module gs2_transforms
7
  use redistribute, only: redist_type
8
  use fft_work, only: fft_type
9

10
  implicit none
11

12
  private
13

14
  public :: init_transforms, finish_transforms, init_zf, get_accel
15
  public :: transform2, inverse2, kz_spectrum
16
  public :: transform_x, transform_y, inverse_x, inverse_y ! Only for testing
17
  public :: g2x, x2y
18
  logical :: initialized=.false., initialized_y_fft=.false.
19
  logical :: initialized_x_redist=.false., initialized_y_redist=.false.
20
  logical :: initialized_zf = .false., initialized_3d=.false.
21

22
  interface transform_x
23
     module procedure transform_x5d
24
  end interface transform_x
25

26
  interface transform_y
27
     module procedure transform_y5d
28
  end interface transform_y
29

30
  interface transform2
31
     module procedure transform2_5d_accel
32
     module procedure transform2_5d
33
     module procedure transform2_3d
34
     module procedure transform2_2d !Not actually used, but gives symmetry to inverse2_2d
35
  end interface transform2
36

37
  interface inverse_x
38
     module procedure inverse_x5d
39
  end interface inverse_x
40

41
  interface inverse_y
42
     module procedure inverse_y5d
43
  end interface inverse_y
44

45
  interface inverse2
46
     module procedure inverse2_5d_accel
47
     module procedure inverse2_5d
48
     module procedure inverse2_3d
49
     module procedure inverse2_2d
50
  end interface inverse2
51

52
  ! redistribution
53
  type (redist_type), save :: g2x, x2y
54

55
  ! fft
56
  type (fft_type), save :: xf_fft, xb_fft, yf_fft, yb_fft, zf_fft
57
  type (fft_type), save :: xf3d_cr, xf3d_rc
58
#ifdef SHMEM
59
  type (fft_type), save :: faccel_shmx, faccel_shmy, baccel_shmx, baccel_shmy
60
  type (fft_type), save :: yf_fft_shm, yb_fft_shm
61
  logical, save :: use_shm_2d_plan=.false., use_shm_2d_plan_buff=.false.
62
  integer, dimension(:), allocatable :: gidx
63
#endif
64

65
  ! accel will be set to true if the v layout is used AND the number of
66
  ! PEs is such that each PE has a complete copy of the x,y space --
67
  ! in that case, no communication is needed to evaluate the nonlinear
68
  ! terms
69
  logical :: accel = .false.
70

71
  logical, dimension(:), allocatable :: aidx  ! aidx == aliased index
72
  integer, dimension(:), allocatable :: ia, iak
73
  complex, dimension(:, :), allocatable :: fft, xxf
74
#ifndef SHMEM
75
  complex, dimension(:, :, :), allocatable :: ag
76
#else
77
  complex, save, dimension(:, :, :), pointer, contiguous :: ag => null()
78
#endif
79

80
contains
81

82
  pure logical function get_accel(negrid, nlambda, nspec) result(accel)
×
83
    use gs2_layouts, only: pe_layout
84
    use mp, only: nproc
85
    integer, intent(in) :: negrid, nlambda, nspec
86
    character(1) :: char
87
    call pe_layout (char)
×
88

89
#ifndef SHMEM
90
    accel = (char == 'v') .and. (mod(negrid * nlambda * nspec, nproc) == 0)
×
91
#else
92
    accel = (char == 'v')
93
    UNUSED_DUMMY(negrid); UNUSED_DUMMY(nlambda); UNUSED_DUMMY(nspec)
94
#endif
95
  end function get_accel
×
96

97
  !> FIXME : Add documentation
98
  subroutine init_transforms &
×
99
       (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny, accelerated)
100
    use mp, only: nproc, iproc, proc0, mp_abort
101
    use gs2_layouts, only: fft_wisdom_file, fft_use_wisdom, fft_measure_plan
102
    use fft_work, only: load_wisdom, save_wisdom, measure_plan
103
    implicit none
104
    integer, intent (in) :: ntgrid, naky, ntheta0, nlambda, negrid, nspec
105
    integer, intent (in) :: nx, ny
106
    logical, intent (out) :: accelerated
107
    logical, parameter :: debug = .false.
108

109
    accelerated = get_accel(negrid, nlambda, nspec)
×
110

111
    !Early exit if possible
112
    if (initialized) return
×
113
    initialized = .true.
×
114

115
    accel = accelerated
×
116

117
    ! If either nx or ny are zero then we cannot setup the transforms so
118
    ! detect this and abort with a helpful message
119
    if (nx == 0 .or. ny == 0) then
×
120
       call mp_abort("Trying to initialise gs2 transforms but nx and / or ny are zero. Did you remember to set `grid_option = 'box'` in `kt_grids_knobs`?", .true.)
×
121
    end if
122

123
    measure_plan = fft_measure_plan
×
124
    if (fft_use_wisdom) call load_wisdom(trim(fft_wisdom_file))
×
125

126
    if (accel) then
×
127
       call init_accel_fft(ntgrid)
×
128
    else
129
       !Recommended for p+log(p)>log(N) where p is number of
130
       !processors and N is total number of mesh points. Could automate
131
       !selection, though above condition is only fairly rough
132
       if (debug) write (*,*) 'init_transforms: init_y_redist_local'
133
       ! Note also calls init_x_redist_local
134
       call init_y_redist_local (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny)
×
135

136
       if (debug) write (*,*) 'init_transforms: init_xy_fft'
137
       call init_xy_fft
×
138
    end if
139

140
    if (debug) write (*,*) 'init_transforms: done'
141
    if (proc0 .and. fft_use_wisdom) call save_wisdom(trim(fft_wisdom_file))
×
142
  end subroutine init_transforms
143

144
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
145

146
  !> Setup accelerated 2D or SHMEM FFT(s)
147
  subroutine init_accel_fft(ntgrid)
×
148
    use gs2_layouts, only: accel_lo, accelx_lo, dealiasing
149
    use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
150
#ifdef SHMEM
151
    use shm_mpi3, only: shm_info, shm_alloc
152
    use mp, only: proc0
153
    use gs2_layouts, only: g_lo, accel_lo
154
    integer :: howmany, stride, n2, n3, idw, nwk, envlen, ts1, ts2
155
    character(len=1) :: envval
156
    complex, dimension(:, :), allocatable :: dummy
157
#endif
158
    integer, intent(in) :: ntgrid
159
    integer :: i, idx
160

161
    if (initialized_y_fft) return
×
162
    initialized_y_fft = .true.
×
163

164
    !JH FFTW plan creation for the accelerated 2d transforms -- do we need this
165
    !when we have SHMEM?
166
    call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
167
         (2*accel_lo%ntgrid+1) * 2)
×
168
    call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
169
         (2*accel_lo%ntgrid+1) * 2)
×
170

171
    ! prepare for dealiasing
172
    allocate (ia (accel_lo%nia), iak(accel_lo%nia))
×
173

174
#ifndef SHMEM
175
    allocate (ag(-ntgrid:ntgrid, 2, accel_lo%llim_proc:accel_lo%ulim_alloc))
×
176
    allocate (aidx(accel_lo%llim_proc:accel_lo%ulim_proc))
×
177

178
    do i = accel_lo%llim_proc, accel_lo%ulim_proc
×
179
       aidx(i) = dealiasing(accel_lo, i)
×
180
    end do
181

182
    do idx = 1, accel_lo%nia
×
183
       ia (idx) = accelx_lo%llim_proc + (idx-1)*accelx_lo%nxny
×
184
       iak(idx) = accel_lo%llim_proc  + (idx-1)*accel_lo%nxnky
×
185
    end do
186
#else
187
    call shm_alloc(ag, [-ntgrid, ntgrid, 1, 2, accel_lo%llim_proc, accel_lo%ulim_alloc])
188
    allocate (gidx(accel_lo%llim_node:accel_lo%ulim_node))
189
    gidx = -1
190
    idx = g_lo%llim_node
191
    allocate (aidx(accel_lo%llim_node:accel_lo%ulim_node))
192
    do i = accel_lo%llim_node, accel_lo%ulim_node
193
       aidx(i) = dealiasing(accel_lo, i)
194
       if (aidx(i)) cycle
195
       gidx(i) = idx; idx = idx+1
196
    end do
197

198
    do idx = 1, accel_lo%nia
199
       ia (idx) = accelx_lo%llim_node + (idx-1)*accelx_lo%nxny
200
       iak(idx) = accel_lo%llim_node  + (idx-1)*accel_lo%nxnky
201
    end do
202

203
    call get_environment_variable("GS2_SHM_2D_PLAN",VALUE=envval,LENGTH=envlen)
204
    if (envlen >0) read(envval,*) use_shm_2d_plan
205
    call get_environment_variable("GS2_SHM_2D_PLAN_BUFF",VALUE=envval, LENGTH=envlen)
206
    if (envlen >0) read(envval,*) use_shm_2d_plan_buff
207
    if (proc0) then
208
       if (use_shm_2d_plan) then
209
          write(*,*)' SHM with 2D plans'
210
          if (use_shm_2d_plan_buff) then
211
             write(*,*)' SHM 2D plans with buffering'
212
          endif
213
       else
214
          write(*,*)' SHM with 1D plans'
215
       endif
216
    endif
217

218
    if (use_shm_2d_plan) then
219
       n2 = accel_lo%ny
220
       n3 = accel_lo%nx
221

222
       ! attention, trouble if ppn =1
223
       if ( shm_info%size > 1 ) then
224
          if (shm_info%id < shm_info%size / 2) then
225
             idw = shm_info%id
226
             ts2=1
227
             nwk = shm_info%size / 2
228
          else
229
             idw = shm_info%id - shm_info%size / 2
230
             ts2=2
231
             nwk = shm_info%size - shm_info%size / 2! for odd ppn
232
          endif
233
       else
234
          idw = 0
235
          ts2=1
236
          nwk=1
237
       endif
238
       ts1 = -ntgrid + (2*ntgrid+1)/nwk * idw &
239
            + min(mod(2*ntgrid+1,nwk), idw)
240
       howmany =  (2*ntgrid+1)/nwk
241
       if ( idw < mod(2*ntgrid+1,nwk)) howmany = howmany+1
242
       if(shm_info%size == 1) howmany = 2*(2*ntgrid+1)
243

244
       if ( use_shm_2d_plan_buff) then
245
          stride=howmany
246
       else
247
          stride=2*(2*accel_lo%ntgrid+1)
248
       endif
249

250
       call init_crfftw (yf_fft_shm, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
251
            howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
252

253
       ! now the inverse
254
       call init_rcfftw (yb_fft_shm, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
255
            howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
256

257
       ! init_... has intent out for plan var
258
       yf_fft_shm%ts1=ts1
259
       yf_fft_shm%ts2=ts2
260
       yb_fft_shm%ts1=ts1
261
       yb_fft_shm%ts2=ts2
262

263
    else
264
       call init_crfftw(faccel_shmx, FFT_TO_REAL_SPACE, accel_lo%ny, 2*(2*ntgrid+1), .true.)
265
       allocate(dummy( 2*(2*ntgrid+1), accel_lo%ndky * accel_lo%nx))
266
       call init_ccfftw(faccel_shmy, FFT_TO_REAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true., accel_lo%ndky)
267
       deallocate(dummy)
268
       call init_rcfftw(baccel_shmx, FFT_TO_SPECTRAL_SPACE, accelx_lo%ny, 2*(2*ntgrid+1), .true.)
269
       allocate(dummy( 2*(2*ntgrid+1), accelx_lo%ny * accelx_lo%nx))
270
       call init_ccfftw(baccel_shmy, FFT_TO_SPECTRAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true.,accel_lo%ndky)
271
       deallocate(dummy)
272
    end if
273
#endif
274
  end subroutine init_accel_fft
275

276
  !> Setup non-accelerated separate x and y FFTs
277
  subroutine init_xy_fft
×
278
    use gs2_layouts, only: xxf_lo, yxf_lo
279
    use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
280
    implicit none
281
    integer :: nb_ffts
282

283
    if (initialized_y_fft) return
×
284
    initialized_y_fft = .true.
×
285

286
    if (.not. allocated(fft)) allocate(fft(yxf_lo%ny/2+1, yxf_lo%llim_proc:yxf_lo%ulim_alloc))
×
287
    if (.not. allocated(xxf)) allocate(xxf(xxf_lo%nx,xxf_lo%llim_proc:xxf_lo%ulim_alloc))
×
288

289
    !JH FFTW plan creation for transform x5d and inverse
290
    ! number of ffts to be calculated
291
    !JH 7th December 2011
292
    !JH xxf_lo%ulim_alloc is used here rather than x/yxf_lo%ulim_proc
293
    !JH because there are situations where x/yxf_lo%llim_proc is greater
294
    !JH than x/yxf_lo%ulim_proc and that would create a negative number
295
    !JH of FFTs to be calculated.  However, x/yxf_lo%ulim_alloc is set
296
    !JH to be x/yxf_lo%llim_proc in this situation, and that will give
297
    !JH 1 FFT to be calculated which the code can correctly undertake.
298
    nb_ffts = xxf_lo%ulim_alloc - xxf_lo%llim_proc + 1
×
299
    call init_ccfftw (xf_fft, FFT_TO_REAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
×
300
    call init_ccfftw (xb_fft, FFT_TO_SPECTRAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
×
301

302
    nb_ffts = yxf_lo%ulim_alloc - yxf_lo%llim_proc + 1
×
303
    call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, yxf_lo%ny, nb_ffts)
×
304
    call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE,  yxf_lo%ny, nb_ffts)
×
305
  end subroutine init_xy_fft
306

307
  !> Constructs the redistribute mapping from the g_lo data
308
  !> decomposition to the xxf_lo decomposition.
309
  subroutine setup_x_redist_local(g_lo, xxf_lo, g2x)
×
310
    use layouts_type, only: g_layout_type, xxf_layout_type
311
    use gs2_layouts, only: proc_id, idx_local, opt_local_copy, layout
312
    use gs2_layouts, only: ik_idx,il_idx,ie_idx,is_idx,idx, ig_idx, isign_idx
313
    use mp, only: nproc
314
    use redistribute, only: index_list_type, init_redist, delete_list
315
    use redistribute, only: set_redist_character_type, set_xxf_optimised_variables
316
    use sorting, only: quicksort
317
    implicit none
318
    type(g_layout_type), intent(in) :: g_lo
319
    type(xxf_layout_type), intent(in) :: xxf_lo
320
    type(redist_type), intent(in out) :: g2x
321
    type (index_list_type), dimension(0:nproc-1) :: to_list, from_list, sort_list
×
322
    integer, dimension(0:nproc-1) :: nn_to, nn_from
×
323
    integer, dimension (3) :: from_low, from_high
324
    integer, dimension (2) :: to_high
325
    integer :: to_low
326
    integer :: iglo, isign, ig, it, ixxf, it0
327
    integer :: n, ip, il, ik, ie, is
328

329
    ! count number of elements to be redistributed to/from each processor
330
    nn_to = 0
×
331
    nn_from = 0
×
332

333
    !Here we loop over the whole domain (so this doesn't get cheaper with more cores)
334
    !This is required to ensure that we can associate send and receive message elements
335
    !i.e. so we know that we put the received data in the correct place.
336
    !However, as we know the order the iglo,isign,ig indices should increase we could
337
    !loop over our ixxf local range, calculate the corresponding iglo,isign and ig indices
338
    !and sort the ixxf messages on this to ensure they're in the correct order.
339
    !Either way when just counting how much data we are going to send and receive we don't
340
    !care about order so we can just loop over our local range!
341
    !First count the sends | g_lo-->xxf_lo
342
    !Protect against procs with no data
343
    if(g_lo%ulim_proc>=g_lo%llim_proc)then
×
344
       do iglo = g_lo%llim_proc, g_lo%ulim_alloc
×
345
          il = il_idx(g_lo, iglo)
×
346

347
          !Convert iglo,isign=1,ig=-ntgrid into ixxf
348
          ixxf=idx(xxf_lo,-g_lo%ntgrid,1,ik_idx(g_lo,iglo),&
349
               il, ie_idx(g_lo,iglo),is_idx(g_lo,iglo))
×
350

351
          !Now loop over other local dimensions
352
          do isign = 1, 2
×
353
             do ig = -g_lo%ntgrid, g_lo%ntgrid
×
354
                ip = proc_id(xxf_lo, ixxf)
×
355

356
                ! Don't send data from forbidden region, unless it is going to
357
                ! this processor (i.e. local copy) due to assumptions in opt_local_copy
358
                ! based methods.
359
                if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
360
                   !Increase the data send count for the proc which has the ixxf
361
                   nn_from(ip)=nn_from(ip)+1
×
362
                end if
363

364
                !Increase ixxf using knowledge of the xxf_lo layout
365
                ixxf=ixxf+xxf_lo%naky
×
366
             enddo
367
          enddo
368
       enddo
369
    endif
370

371
    !Now count the receives | xxf_lo<--g_lo
372
    !Protect against procs with no data
373
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
374
       do ixxf = xxf_lo%llim_proc, xxf_lo%ulim_alloc
×
375
          !Could split it (or x) domain into two parts to account for
376
          !difference in it (or x) meaning and order in g_lo and xxf_lo
377
          !but only interested in how much data we receive and not the
378
          !exact indices (yet) so just do 1-->ntheta0 (this is how many
379
          !non-zero x's?)
380
          ik = ik_idx(xxf_lo, ixxf)
×
381
          il = il_idx(xxf_lo, ixxf)
×
382
          ie = ie_idx(xxf_lo, ixxf)
×
383
          is = is_idx(xxf_lo, ixxf)
×
384
          ig = ig_idx(xxf_lo, ixxf)
×
385
          do it=1,xxf_lo%ntheta0
×
386
             !Convert ixxf,it indices into iglo, ig and isign indices
387
             iglo=idx(g_lo, ik, it, il, ie, is)
×
388

389
             ip = proc_id(g_lo, iglo)
×
390

391
             ! Don't send data from forbidden region, unless it is going to
392
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
393
             ! based methods.
394
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
395

396
             !Increase the data to receive count for proc with this data
397
             !Note, we only worry about iglo and not isign/ig because we know that
398
             !in g_lo each proc has all ig and isign domain.
399
             !The xxf_lo domain contains ig and isign so we only need to add one
400
             !to the count for each ixxf.
401
             nn_to(proc_id(g_lo,iglo))=nn_to(proc_id(g_lo,iglo))+1
×
402
          enddo
403
       enddo
404
    endif
405

406
    !Now allocate storage for data mapping indices
407
    do ip = 0, nproc-1
×
408
       if (nn_from(ip) > 0) then
×
409
          allocate (from_list(ip)%first(nn_from(ip)))
×
410
          allocate (from_list(ip)%second(nn_from(ip)))
×
411
          allocate (from_list(ip)%third(nn_from(ip)))
×
412
       end if
413
       if (nn_to(ip) > 0) then
×
414
          allocate (to_list(ip)%first(nn_to(ip)))
×
415
          allocate (to_list(ip)%second(nn_to(ip)))
×
416
          !For sorting to_list later
417
          allocate (sort_list(ip)%first(nn_to(ip)))
×
418
       end if
419
    end do
420

421
    !Reinitialise count arrays to zero
422
    nn_to = 0
×
423
    nn_from = 0
×
424

425
    !First fill in the sending indices, these define the messages data order
426
    !Protect against procs with no data
427
    if(g_lo%ulim_proc>=g_lo%llim_proc)then
×
428
       do iglo=g_lo%llim_proc, g_lo%ulim_alloc
×
429
          il = il_idx(g_lo, iglo)
×
430

431
          !Convert iglo,isign=1,ig=-ntgrid into ixxf
432
          ixxf=idx(xxf_lo,-g_lo%ntgrid,1,ik_idx(g_lo,iglo),&
433
               il, ie_idx(g_lo,iglo),is_idx(g_lo,iglo))
×
434

435
          !Now loop over other local dimensions
436
          do isign = 1, 2
×
437
             do ig = -g_lo%ntgrid, g_lo%ntgrid
×
438
                !Get proc id
439
                ip=proc_id(xxf_lo,ixxf)
×
440
                if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
441

442
                   !Increment procs message counter
443
                   n=nn_from(ip)+1
×
444
                   nn_from(ip)=n
×
445

446
                   !Store indices
447
                   from_list(ip)%first(n) = ig
×
448
                   from_list(ip)%second(n) = isign
×
449
                   from_list(ip)%third(n) = iglo
×
450
                end if
451

452
                !We could send this information, transformed to the xxf layout to the proc.
453

454
                !Increment counter
455
                ixxf=ixxf+xxf_lo%naky
×
456
             enddo
457
          enddo
458
       enddo
459
    endif
460

461
    !Now lets fill in the receiving indices, these must match the messages data order
462
    !Protect against procs with no data
463
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
464
       do ixxf = xxf_lo%llim_proc, xxf_lo%ulim_alloc
×
465
          !Get indices
466
          isign=isign_idx(xxf_lo,ixxf)
×
467
          ik = ik_idx(xxf_lo, ixxf)
×
468
          il = il_idx(xxf_lo, ixxf)
×
469
          ie = ie_idx(xxf_lo, ixxf)
×
470
          is = is_idx(xxf_lo, ixxf)
×
471
          ig = ig_idx(xxf_lo, ixxf)
×
472

473
          !Loop over receiving "it" indices
474
          do it=1,g_lo%ntheta0
×
475
             !Convert from g_lo%it to xxf_lo%it
476
             if(it>(xxf_lo%ntheta0+1)/2) then
×
477
                it0=it+xxf_lo%nx-xxf_lo%ntheta0
×
478
             else
479
                it0=it
×
480
             endif
481

482
             !Convert ixxf,it indices into iglo indices
483
             iglo=idx(g_lo, ik, it, il, ie, is)
×
484

485
             !Get proc id which has this data
486
             ip=proc_id(g_lo,iglo)
×
487

488
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
489

490
             !Determine message position index
491
             n=nn_to(ip)+1
×
492
             nn_to(ip)=n
×
493

494
             !Store receive indices
495
             to_list(ip)%first(n)=it0
×
496
             to_list(ip)%second(n)=ixxf
×
497

498
             !Store index for sorting
499
             sort_list(ip)%first(n) = ig+g_lo%ntgrid-1+g_lo%ntgridtotal*(isign-1+2*(iglo-g_lo%llim_world))
×
500
          enddo
501
       enddo
502
    endif
503

504
    !Now we need to sort the to_list message indices based on sort_list | This seems potentially slow + inefficient
505
    do ip=0,nproc-1
×
506
       !Only need to worry about procs which we are receiving from
507
       if(nn_to(ip)>0) then
×
508
          !Sort using quicksort
509
          call quicksort(nn_to(ip),sort_list(ip)%first,to_list(ip)%first,to_list(ip)%second)
×
510
       endif
511
    enddo
512

513
    !Setup array range values
514
    from_low (1) = -g_lo%ntgrid
×
515
    from_low (2) = 1
×
516
    from_low (3) = g_lo%llim_proc
×
517

518
    to_low = xxf_lo%llim_proc
×
519

520
    to_high(1) = xxf_lo%nx
×
521
    to_high(2) = xxf_lo%ulim_alloc
×
522

523
    from_high(1) = g_lo%ntgrid
×
524
    from_high(2) = 2
×
525
    from_high(3) = g_lo%ulim_alloc
×
526

527
    call set_redist_character_type(g2x, 'g2x')
×
528
    call set_xxf_optimised_variables(opt_local_copy, xxf_lo%naky,  &
529
         xxf_lo%ntgrid,  xxf_lo%ntheta0, xxf_lo%nlambda,  xxf_lo%negrid, &
530
         xxf_lo%nx, xxf_lo%ulim_proc, g_lo%ulim_proc, layout)
×
531

532
    !Create g2x redistribute object
533
    call init_redist (g2x, 'c', to_low, to_high, to_list, &
×
534
         from_low, from_high, from_list)
×
535

536
    !Deallocate list objects
537
    call delete_list (to_list)
×
538
    call delete_list (from_list)
×
539
    call delete_list(sort_list)
×
540

541
  end subroutine setup_x_redist_local
×
542

543
  !> Setup global xxf_lo and redistribute between global g_lo -> xxf_lo
544
  subroutine init_x_redist_local(ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx)
×
545
    use gs2_layouts, only: g_lo, xxf_lo
546
    use mp, only: nproc, iproc
547
    implicit none
548
    integer, intent (in) :: ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx
549

550
    !Early exit if possible
551
    if (initialized_x_redist) return
×
552
    initialized_x_redist = .true.
×
553

554
    call setup_x_redist_local(g_lo, xxf_lo, g2x)
×
555
  end subroutine init_x_redist_local
556

557
  !> Constructs the redistribute mapping from the xxf_lo data
558
  !> decomposition to the yxf_lo decomposition.
559
  subroutine setup_y_redist_local(xxf_lo, yxf_lo, x2y)
×
560
    use layouts_type, only: xxf_layout_type, yxf_layout_type
561
    use gs2_layouts, only: proc_id, idx_local
562
    use gs2_layouts, only: ik_idx,it_idx,il_idx,ie_idx,is_idx,idx,ig_idx,isign_idx
563
    use mp, only: nproc
564
    use redistribute, only: index_list_type, init_redist, delete_list
565
    use redistribute, only: set_yxf_optimised_variables, set_redist_character_type
566
    use sorting, only: quicksort
567
    implicit none
568
    type(xxf_layout_type), intent(in) :: xxf_lo
569
    type(yxf_layout_type), intent(in) :: yxf_lo
570
    type(redist_type), intent(in out) :: x2y
571
    type (index_list_type), dimension(0:nproc-1) :: to_list, from_list,sort_list
×
572
    integer, dimension(0:nproc-1) :: nn_to, nn_from
×
573
    integer, dimension (2) :: from_low, from_high, to_high
574
    integer :: to_low
575
    integer :: it, ixxf, ik, iyxf
576
    integer :: ixxf_start, iyxf_start
577
    integer :: n, ip, il, ig
578

579
    !Initialise counts to zero
580
    nn_to = 0
×
581
    nn_from = 0
×
582

583
    !First count data to send | xxf_lo-->yxf_lo
584
    !Protect against procs with no data
585
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
586
       do ixxf=xxf_lo%llim_proc,xxf_lo%ulim_alloc
×
587
          il = il_idx(xxf_lo, ixxf)
×
588
          ig = ig_idx(xxf_lo, ixxf)
×
589

590
          !Get iyxf index for "it"=1
591
          iyxf_start=idx(yxf_lo, ig,&
592
               isign_idx(xxf_lo,ixxf), 1, il,&
593
               ie_idx(xxf_lo,ixxf),is_idx(xxf_lo,ixxf))
×
594

595
          !Loop over "it" range, note that we actually only want to know
596
          !iyxf in this range and we know that it-->it+1 => iyxf-->iyxf+1
597
          !so replace loop with one over iyxf
598
          do iyxf=iyxf_start,iyxf_start+yxf_lo%nx-1
×
599
             ip = proc_id(yxf_lo, iyxf)
×
600

601
             ! Don't send data from forbidden region, unless it is going to
602
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
603
             ! based methods.
604
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
605
             !Increase the appropriate procs send count
606
             nn_from(ip)=nn_from(ip)+1
×
607
          enddo
608
       enddo
609
    endif
610

611
    !Now count data to receive | yxf_lo<--xxf_lo
612
    !Protect against procs with no data
613
    if(yxf_lo%ulim_proc>=yxf_lo%llim_proc)then
×
614
       do iyxf=yxf_lo%llim_proc,yxf_lo%ulim_alloc
×
615
          ig = ig_idx(yxf_lo, iyxf)
×
616
          il = il_idx(yxf_lo, iyxf)
×
617
          !Get ixxf index for "ik"=1
618
          ixxf_start=idx(xxf_lo, ig,&
619
               isign_idx(yxf_lo,iyxf), 1, il,&
620
               ie_idx(yxf_lo,iyxf),is_idx(yxf_lo,iyxf))
×
621

622
          !Loop over "ik" range, note that we actually only want to know
623
          !ixxf in this range and we know that ik-->ik+1 => ixxf-->ixxf+1
624
          !so replace loop with one over ixxf
625
          do ixxf=ixxf_start,ixxf_start+xxf_lo%naky-1
×
626
             ip = proc_id(xxf_lo, ixxf)
×
627

628
             ! Don't send data from forbidden region, unless it is going to
629
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
630
             ! based methods.
631
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
632

633
             !Increase the appropriate procs recv count
634
             nn_to(ip) = nn_to(ip)+1
×
635
          enddo
636
       enddo
637
    endif
638

639
    !Now allocate storage for data mapping structures
640
    do ip = 0, nproc-1
×
641
       if (nn_from(ip) > 0) then
×
642
          allocate (from_list(ip)%first(nn_from(ip)))
×
643
          allocate (from_list(ip)%second(nn_from(ip)))
×
644
       end if
645
       if (nn_to(ip) > 0) then
×
646
          allocate (to_list(ip)%first(nn_to(ip)))
×
647
          allocate (to_list(ip)%second(nn_to(ip)))
×
648
          !For sorting to_list later
649
          allocate (sort_list(ip)%first(nn_to(ip)))
×
650
       end if
651
    end do
652

653
    !Reinitialise count arrays to zero
654
    nn_to = 0
×
655
    nn_from = 0
×
656

657
    !First fill in the sending indices, these define the messages data order
658
    !Protect against procs with no data
659
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
660
       do ixxf=xxf_lo%llim_proc,xxf_lo%ulim_alloc
×
661
          ig = ig_idx(xxf_lo, ixxf)
×
662
          il = il_idx(xxf_lo, ixxf)
×
663

664
          !Get iyxf for "it"=1
665
          iyxf=idx(yxf_lo, ig,&
666
               isign_idx(xxf_lo,ixxf), 1, il,&
667
               ie_idx(xxf_lo,ixxf),is_idx(xxf_lo,ixxf))
×
668

669
          !Now loop over other local dimension. Note we need "it" here
670
          !so don't replace this with loop over iyxf
671
          do it=1,yxf_lo%nx
×
672
             !Get the processor id
673
             ip=proc_id(yxf_lo,iyxf)
×
674

675
             ! Don't send data from forbidden region, unless it is going to
676
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
677
             ! based methods.
678
             if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
679
                !Increment the procs message counter
680
                n=nn_from(ip)+1
×
681
                nn_from(ip)=n
×
682

683
                !Store indices
684
                from_list(ip)%first(n)=it
×
685
                from_list(ip)%second(n)=ixxf
×
686
             end if
687

688
             !Increment iyxf
689
             iyxf=iyxf+1
×
690
          enddo
691
       enddo
692
    endif
693

694
    !Now fill in the receiving indices, these must match the message data order, achieved by later sorting
695
    !Protect against procs with no data
696
    if(yxf_lo%ulim_proc>=yxf_lo%llim_proc)then
×
697
       do iyxf=yxf_lo%llim_proc,yxf_lo%ulim_alloc
×
698
          ig = ig_idx(yxf_lo, iyxf)
×
699
          il = il_idx(yxf_lo, iyxf)
×
700

701
          !Get ixxf for "ik"=1
702
          ixxf=idx(xxf_lo, ig,&
703
               isign_idx(yxf_lo,iyxf), 1, il,&
704
               ie_idx(yxf_lo,iyxf),is_idx(yxf_lo,iyxf))
×
705

706
          !Now loop over other local dimension. Note we need "ik" here
707
          !so don't replace this with loop over ixxf
708
          do ik=1,xxf_lo%naky
×
709
             !Get the processor id
710
             ip=proc_id(xxf_lo,ixxf)
×
711

712
             ! Don't send data from forbidden region, unless it is going to
713
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
714
             ! based methods.
715
             if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
716

717
                !Increment the procs message counter
718
                n=nn_to(ip)+1
×
719
                nn_to(ip)=n
×
720

721
                !Store indices
722
                to_list(ip)%first(n)=ik
×
723
                to_list(ip)%second(n)=iyxf
×
724

725
                !Store index for sorting
726
                sort_list(ip)%first(n)=it_idx(yxf_lo,iyxf)+ixxf*yxf_lo%nx
×
727
             end if
728

729
             !Increment ixxf
730
             ixxf=ixxf+1
×
731
          enddo
732
       enddo
733
    endif
734

735
    !Now we need to sort the to_list message indices based on sort_list
736
    !This could be slow and inefficient
737
    do ip=0,nproc-1
×
738
       !Only need to worry about procs which we are receiving from
739
       if(nn_to(ip)>0) then
×
740
          !Use quicksort based on compound index
741
          call quicksort(nn_to(ip),sort_list(ip)%first,to_list(ip)%first,to_list(ip)%second)
×
742
       endif
743
    enddo
744

745
    !Setup array bound values
746
    from_low(1) = 1
×
747
    from_low(2) = xxf_lo%llim_proc
×
748

749
    to_low = yxf_lo%llim_proc
×
750

751
    to_high(1) = yxf_lo%ny/2+1
×
752
    to_high(2) = yxf_lo%ulim_alloc
×
753

754
    from_high(1) = xxf_lo%nx
×
755
    from_high(2) = xxf_lo%ulim_alloc
×
756

757
    call set_redist_character_type(x2y, 'x2y')
×
758
    call set_yxf_optimised_variables(yxf_lo%ulim_proc)
×
759

760
    !Create x2y redist object
761
    call init_redist (x2y, 'c', to_low, to_high, to_list, &
×
762
         from_low, from_high, from_list)
×
763

764
    !Deallocate list objects
765
    call delete_list (to_list)
×
766
    call delete_list (from_list)
×
767
    call delete_list (sort_list)
×
768
  end subroutine setup_y_redist_local
×
769

770
  !> Small helper function to wrap up logic about if this point corresponds to a forbidden
771
  !> point and if we need to transfer it in the redistribute
772
  pure logical function point_is_forbidden_and_not_redistributed(ig, il, ip) result(flag)
×
773
    use mp, only: iproc
774
    use le_grids, only: forbid
775
    use gs2_layouts, only: opt_local_copy
776
    implicit none
777
    integer, intent(in) :: ig, il, ip
778
    !> The following is for debug/testing to force all grid
779
    !> points to be communicated if we wish.
780
    logical, parameter :: skip_forbidden_region = .true.
781
    flag = &
782
         !We want to skip the forbidden points
783
         skip_forbidden_region .and. &
×
784
         !Point is in forbidden region
785
         forbid(ig, il) &
786
         !Point will be sent to a different proc, or we are not using opt_local_copy.
787
         .and. ((ip/=iproc) .or. .not. opt_local_copy)
×
788
  end function point_is_forbidden_and_not_redistributed
×
789

790
  !> Setup the module level xxf -> yxf redistribute. Note this also
791
  !> calls [[init_x_redist_local]] in order to setup the mapping for
792
  !> g_lo to xxf_lo as well.
793
  subroutine init_y_redist_local (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx, ny)
×
794
    use gs2_layouts, only: xxf_lo, yxf_lo
795
    use mp, only: nproc, iproc
796
    implicit none
797
    integer, intent (in) :: ntgrid, naky, ntheta0, nlambda, negrid, nspec
798
    integer, intent (in) :: nx, ny
799

800
    !Setup g_lo-->xxf_lo redist object first
801
    call init_x_redist_local (ntgrid, naky, ntheta0, nlambda, negrid, nspec, nx)
×
802

803
    !Early exit if possible
804
    if (initialized_y_redist) return
×
805
    initialized_y_redist = .true.
×
806

807
    call setup_y_redist_local(xxf_lo, yxf_lo, x2y)
×
808
  end subroutine init_y_redist_local
809

810
  !> FIXME : Add documentation
811
  subroutine transform_x5d (g, xxf)
×
812
    use gs2_layouts, only: xxf_lo, g_lo
813
    use redistribute, only: gather
814
    use job_manage, only: time_message
815
    use fft_work, only: time_fft
816
    use array_utils, only: zero_array
817
    implicit none
818
    complex, dimension (-xxf_lo%ntgrid:,:,g_lo%llim_proc:), intent (in) :: g
819
    complex, dimension (:,xxf_lo%llim_proc:), intent (out) :: xxf
820

821
    ! Zero out the array as the subsequent gather doesn't populate
822
    ! every element as we skip communicating forbidden points and
823
    ! we are padding our data. We can't just zero once at the start
824
    ! as fftw can destroy the input, meaning it might set zero entries
825
    ! in fft to something non-zero. Plus here xxf also holds the output.
826
    call zero_array(xxf)
×
827

828
    !CMR, 7/3/2011: gather pulls appropriate pieces of g onto this processor for
829
    !    local Fourier transform in x, and may also pad with zeros for dealiasing
830
    !
831
    call gather (g2x, g, xxf)
×
832

833
    call time_message(.false., time_fft, ' FFT')
×
834
    call xf_fft%execute_c2c(xxf, xxf)
×
835
    call time_message(.false., time_fft, ' FFT')
×
836
  end subroutine transform_x5d
×
837

838
  !> FIXME : Add documentation  
839
  subroutine inverse_x5d (xxf, g)
×
840
    use gs2_layouts, only: xxf_lo, g_lo
841
    use redistribute, only: scatter
842
    use job_manage, only: time_message
843
    use fft_work, only: time_fft
844
    implicit none
845
    complex, dimension (:,xxf_lo%llim_proc:), intent (in out) :: xxf
846
    complex, dimension (-xxf_lo%ntgrid:,:,g_lo%llim_proc:), intent (out) :: g
847

848
    call time_message(.false., time_fft, ' FFT')
×
849
    call xb_fft%execute_c2c(xxf, xxf)
×
850
    call time_message(.false., time_fft, ' FFT')
×
851

852
    call scatter (g2x, xxf, g)
×
853
  end subroutine inverse_x5d
×
854

855
  !> FIXME : Add documentation
856
  subroutine transform_y5d (xxf, yxf)
×
857
    use gs2_layouts, only: xxf_lo, yxf_lo
858
    use redistribute, only: gather
859
    use job_manage, only: time_message
860
    use fft_work, only: time_fft
861
    use array_utils, only: zero_array
862
    implicit none
863
    complex, dimension (:,xxf_lo%llim_proc:), intent (in) :: xxf
864
    real, dimension (:,yxf_lo%llim_proc:), intent(in out) :: yxf
865

866
    ! Zero out the array as the subsequent gather doesn't populate
867
    ! every element as we skip communicating forbidden points and
868
    ! we are padding our data. We can't just zero once at the start
869
    ! as fftw can destroy the input, meaning it might set zero entries
870
    ! in fft to something non-zero.
871
    call zero_array(fft)
×
872

873
    !Note here we're doing the communication even if we're not using
874
    !an FFT routine.
875
    call gather (x2y, xxf, fft)
×
876

877
    call time_message(.false., time_fft, ' FFT')
×
878
    call yf_fft%execute_c2r(fft, yxf)
×
879
    call time_message(.false., time_fft, ' FFT')
×
880
  end subroutine transform_y5d
×
881

882
  !> FIXME : Add documentation  
883
  subroutine inverse_y5d (yxf, xxf)
×
884
    use gs2_layouts, only: xxf_lo, yxf_lo
885
    use redistribute, only: scatter
886
    use job_manage, only: time_message
887
    use fft_work, only: time_fft
888
    implicit none
889
    real, dimension (:,yxf_lo%llim_proc:), intent (in out) :: yxf
890
    complex, dimension (:,xxf_lo%llim_proc:), intent (out) :: xxf
891

892
    call time_message(.false., time_fft, ' FFT')
×
893
    call yb_fft%execute_r2c(yxf, fft)
×
894
    call time_message(.false., time_fft, ' FFT')
×
895

896
    call scatter (x2y, fft, xxf)
×
897
  end subroutine inverse_y5d
×
898

899
  !> FIXME : Add documentation  
900
  subroutine transform2_5d (g, yxf)
×
901
    use gs2_layouts, only: g_lo, yxf_lo, ik_idx
902
    use unit_tests,only: debug_message
903
    implicit none
904
    complex, dimension (:,:,g_lo%llim_proc:), intent (in out) :: g
905
    real, dimension (:,yxf_lo%llim_proc:), intent (out) :: yxf
906
    integer :: iglo
907

908
    call debug_message(4, 'gs2_transforms::transform2_5d starting')
×
909

910
    !CMR+GC: 2/9/2013
911
    !  gs2's Fourier coefficients,  F_k^gs2, not standard form: i.e. f(x) = f_k e^(i k.x)
912
    !
913
    !  F_k^gs2 are 2 x larger for ky > 0,   i.e.
914
    !                     F_k^gs2 = |    f_k   for ky = 0
915
    !                               |  2 f_k   for ky > 0
916
    !
917
    ! Following large loop (due to this) can be eliminated with std Fourier coeffs.
918
    ! Similar optimisations possible in: 
919
    !          "inverse2_5d", "transform2_5d_accel", "inverse2_5d_accel" 
920
    !
921
    ! NB Moving to standard Fourier coeffs would impact considerably on diagnostics:
922
    !       e.g. fac in get_volume_average
923
    !
924

925
    !$OMP PARALLEL DO DEFAULT(none) &
926
    !$OMP SHARED(g_lo, g) &
927
    !$OMP SCHEDULE(dynamic)
928
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
×
929
       if (ik_idx(g_lo, iglo) == 1) cycle
×
930
       g(:,:,iglo) = g(:,:,iglo) * 0.5
×
931
    end do
932
    !$OMP END PARALLEL DO
933

934
    call transform_x (g, xxf)
×
935
    call transform_y (xxf, yxf)
×
936
  end subroutine transform2_5d
×
937

938
  !> FIXME : Add documentation  
939
  subroutine inverse2_5d (yxf, g)
×
940
    use gs2_layouts, only: g_lo, yxf_lo, ik_idx
941
    implicit none
942
    real, dimension (:,yxf_lo%llim_proc:), intent (in out) :: yxf
943
    complex, dimension (:,:,g_lo%llim_proc:), intent (out) :: g
944
    integer :: iglo
945
    real :: scale
946
    call inverse_y (yxf, xxf)
×
947
    call inverse_x (xxf, g)
×
948

949
    scale = xb_fft%scale * yb_fft%scale
×
950
    !CMR+GC: 2/9/2013
951
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
952
    ! (See above comment in transform2_5d.)
953
    !
954

955
    !$OMP PARALLEL DO DEFAULT(none) &
956
    !$OMP SHARED(g_lo, g, scale) &
957
    !$OMP SCHEDULE(dynamic)
958
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
×
959
       if (ik_idx(g_lo, iglo) /= 1) then
×
960
          g(:,:,iglo) = g(:,:,iglo) * (scale * 2.0)
×
961
       else
962
          g(:,:,iglo) = g(:,:,iglo) * scale
×
963
       end if
964
    end do
965
    !$OMP END PARALLEL DO
966
  end subroutine inverse2_5d
×
967

968
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
969

970
  !> FIXME : Add documentation
971
  subroutine transform2_5d_accel (g, axf)
×
972
    use gs2_layouts, only: g_lo, accel_lo, accelx_lo, ik_idx
973
    use unit_tests, only: debug_message
974
    use job_manage, only: time_message
975
    use fft_work, only: time_fft
976
#ifdef SHMEM
977
    use shm_mpi3, only : shm_info, shm_get_node_pointer, shm_node_barrier, shm_fence
978
#endif
979
    implicit none
980
    complex, dimension (:,:,g_lo%llim_proc:), target, intent (in out) :: g
981
    real, dimension (:,:,accelx_lo%llim_proc:), target, intent (in out) :: axf
982
    integer :: iglo, k, idx
983
#ifdef SHMEM
984
    complex, pointer, contiguous ::ag_ptr(:,:,:) => null(), g_ptr(:,:,:) => null()
985
    real, pointer, contiguous :: axf_ptr(:,:,:) => null() 
986
    integer  :: kk, nk, kb, bidx
987
    integer, save :: iglo_s, iglo_e, kbs, kbe
988
    logical, save :: firsttime =.true.
989
#endif 
990

991
    integer :: itgrid, iduo
992
    integer :: ntgrid
993

994
    call debug_message(4, 'gs2_transforms::transform2_5d_accel starting')
×
995

996
    ntgrid = accel_lo%ntgrid
×
997
#ifdef SHMEM
998
    g_ptr (1:,1:,g_lo%llim_node:) => shm_get_node_pointer(g, -1)
999
    ag_ptr (-ntgrid:,1:,accel_lo%llim_node:) => shm_get_node_pointer(ag, -1)
1000
    if(firsttime)then
1001
       firsttime=.false.
1002
       call compute_block_partition(g_lo%ntheta0 * g_lo%naky, iglo_s, iglo_e)
1003
       call compute_block_partition(accel_lo%nxnky, kbs, kbe)
1004
    end if
1005

1006
#endif
1007
    !
1008
    !CMR+GC, 2/9/2013:
1009
    !  Scaling g would not be necessary if gs2 used standard Fourier coefficients.
1010
    !
1011
    ! scale the g and copy into the anti-aliased array ag
1012
    ! zero out empty ag
1013
    ! touch each g and ag only once
1014
#ifndef SHMEM
1015
    idx = g_lo%llim_proc
×
1016
    do k = accel_lo%llim_proc, accel_lo%ulim_proc
×
1017
       ! CMR: aidx is true for modes killed by the dealiasing mask
1018
       ! so following line removes ks in dealiasing region
1019
       if (aidx(k)) then
×
1020
          ag(:,:,k) = 0.0
×
1021
       else
1022
          ! scaling only for k_y not the zero mode
1023
          if (ik_idx(g_lo, idx) /= 1) then
×
1024
             do iduo = 1, 2
×
1025
                do itgrid = 1, 2*ntgrid +1
×
1026
                   ! It seems strange that we scale the input and then
1027
                   ! copy it to ag -- why not just scaling ag, which is
1028
                   ! what we are going to fft and return?
1029
                   g(itgrid, iduo, idx) &
×
1030
                        = 0.5 * g(itgrid, iduo, idx)
×
1031
                   ag(itgrid - (ntgrid+1), iduo, k) &
×
1032
                        = g(itgrid, iduo, idx)
×
1033
                enddo
1034
             enddo
1035
             ! in case of k_y being zero: just copy
1036
          else
1037
             do iduo = 1, 2
×
1038
                do itgrid = 1, 2*ntgrid+1
×
1039
                   ag(itgrid-(ntgrid+1), iduo, k) &
×
1040
                        = g(itgrid, iduo, idx)
×
1041
                enddo
1042
             enddo
1043
          endif
1044
          idx = idx + 1
×
1045
       endif
1046
    enddo
1047
#else
1048
    call shm_node_barrier
1049
    bidx=0
1050
    do kb = accel_lo%llim_node, accel_lo%ulim_node, accel_lo%nxnky
1051
       idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0
1052

1053
       ! we might not have scaled all g
1054
       Do iglo = idx+iglo_s, idx+iglo_s+iglo_e
1055
          if (ik_idx(g_lo, iglo) /= 1) then
1056
             g_ptr(:,:, iglo) = 0.5 * g_ptr(:,:,iglo)
1057
          endif
1058
       enddo
1059

1060
       call shm_node_barrier
1061
       call shm_fence(g_ptr(1,1,g_lo%llim_node))
1062

1063
       do k = kb+kbs, kb+kbs+kbe !accel_lo%llim_node, accel_lo%ulim_node       
1064
          ! CMR: aidx is true for modes killed by the dealiasing mask
1065
          ! so following line removes ks in dealiasing region
1066
          if (aidx(k)) then
1067
             ag_ptr(:,:,k) = 0.0
1068
          else
1069
             do iduo = 1, 2
1070
                do itgrid =1, 2*ntgrid +1
1071
                   ag_ptr(itgrid - (ntgrid+1), iduo, k) &
1072
                        = g_ptr(itgrid, iduo, gidx(k))
1073
                enddo
1074
             enddo
1075
             !         idx=idx+1
1076
          endif
1077
       enddo
1078
       bidx=bidx+1
1079
    enddo
1080
#endif
1081

1082
    !CMR+GC: 2/9/2013
1083
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1084
    ! (See above comment in transform2_5d.)
1085

1086
#ifndef SHMEM
1087
    ! we might not have scaled all g
1088
    Do iglo = idx, g_lo%ulim_proc
×
1089
       if (ik_idx(g_lo, iglo) /= 1) then
×
1090
          g(:,:, iglo) = 0.5 * g(:,:,iglo)
×
1091
       endif
1092
    enddo
1093
#else
1094

1095
    axf_ptr(-ntgrid:,1:,accelx_lo%llim_node:) => & 
1096
         shm_get_node_pointer(axf, -1)
1097
#endif
1098

1099

1100
    ! transform
1101
#ifndef SHMEM
1102
    idx = 1
×
1103
    call time_message(.false., time_fft, ' FFT')
×
1104
    do k = accel_lo%llim_proc, accel_lo%ulim_proc, accel_lo%nxnky
×
1105
       ! remember FFTW3 for c2r destroys the contents of ag
1106
       call yf_fft%execute_c2r(ag(:, :, k:), axf(:, :, ia(idx):))
×
1107
       idx = idx + 1
×
1108
    end do
1109
    call time_message(.false., time_fft, ' FFT')
×
1110
#else
1111
    ! in the following the ranks operate on other data ranges
1112
    call shm_node_barrier
1113

1114
    if (use_shm_2d_plan) then
1115
       idx = 1 + shm_info%id
1116
       idx = mod(idx -1, accel_lo%nia) +1
1117
       nk = accel_lo%ulim_node - accel_lo%llim_node +1
1118
    else
1119
       idx = 1
1120
    endif
1121
    call time_message(.false., time_fft, ' FFT')
1122
    do k = accel_lo%llim_node, accel_lo%ulim_node, accel_lo%nxnky
1123
       if (use_shm_2d_plan) then
1124
          kk = k + shm_info%id * accel_lo%nxnky
1125
          kk = mod(kk - accel_lo%llim_node, nk) + accel_lo%llim_node
1126
       else
1127
          kk = k
1128
       endif
1129

1130
       call fft_shm(kk,ia(idx))
1131

1132
       idx = idx + 1
1133
       if ( use_shm_2d_plan) then
1134
          idx = mod(idx -1, accel_lo%nia) +1
1135
       endif
1136
    end do
1137
    call shm_node_barrier
1138
    call time_message(.false., time_fft, ' FFT')
1139
  contains
1140

1141
    !> FIXME : Add documentation
1142
    subroutine fft_shm(k1, k2)
1143
      use mp, only : proc0
1144
      implicit none
1145
      integer, intent(in) :: k1, k2
1146

1147
      if (use_shm_2d_plan) then 
1148
         call fft2d_shm(k1, k2)
1149
      else
1150
         call fft1d_shm(k1,k2)
1151
      endif
1152

1153
    end subroutine fft_shm
1154

1155
    !> FIXME : Add documentation
1156
    subroutine fft1d_shm(k1,k2)
1157
      use shm_mpi3, only : shm_info, shm_fence
1158
      implicit none
1159
      integer, intent(in) :: k1, k2
1160

1161
      integer :: i, j, is, ie, js, je, idw, nwk
1162
      integer :: n1,n2,n3,p1,p2,p3
1163

1164
      n1 = 2*(2*ntgrid+1)
1165
      n2 = accel_lo%ndky
1166
      n3 = accel_lo%nx
1167
      p1 = 2*(2*ntgrid+1)
1168
      p2 = accelx_lo%ny
1169
      p3 =  accelx_lo%nx
1170

1171
      if ( p3 /= n3 ) then 
1172
         write(0,*) "fft_shm: WARNING third dimesions not equal"
1173
      endif
1174

1175
      nwk = g_lo%ppn
1176
      idw = shm_info%id
1177
      is = idw*(n2/nwk) + min(idw,mod(n2, nwk))
1178
      ie = is + (n2/nwk) -1 
1179
      if ( idw < mod(n2, nwk)) ie = ie +1
1180
      js = idw*(p3/nwk) + min(idw,mod(p3, nwk))
1181
      je = js + (p3/nwk) -1 
1182
      if ( idw < mod(p3, nwk)) je = je +1
1183

1184
      do i = is, ie
1185
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1186
         ! so we have to fall back to an implied interface approach
1187
#ifdef SINGLE_PRECISION
1188
         call sfftw_execute_dft( &
1189
#else
1190
         call dfftw_execute_dft( &
1191
#endif
1192
         faccel_shmy%plan, ag_ptr(-ntgrid,1,k1+i), ag_ptr(-ntgrid,1,k1+i))
1193
      end do
1194

1195
      call shm_node_barrier
1196
      call shm_fence(ag_ptr(-ntgrid,1,accel_lo%llim_node))
1197

1198
      do j = js, je
1199
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1200
         ! so we have to fall back to an implied interface approach
1201
#ifdef SINGLE_PRECISION
1202
         call sfftw_execute_dft_c2r( &
1203
#else
1204
         call dfftw_execute_dft_c2r( &
1205
#endif
1206
         faccel_shmx%plan, ag_ptr(-ntgrid,1,k1+j*n2), axf_ptr(-ntgrid,1,k2+j*p2))
1207
      end do
1208
    end subroutine fft1d_shm
1209

1210
    !> FIXME : Add documentation         
1211
    subroutine fft2d_shm(k1,k2)
1212
      use shm_mpi3, only : shm_info, shm_fence
1213
      implicit none
1214
      integer, intent(in) :: k1, k2
1215

1216
      integer :: i, j, n2,n3, howmany,ts1,ts2
1217

1218
      complex,allocatable :: cin(:,:)
1219
      real, allocatable ::  rout(:,:) 
1220

1221
      n2 = accel_lo%ny
1222
      n3 = accel_lo%nx
1223
      howmany = yf_fft_shm%howmany
1224
      ts1=yf_fft_shm%ts1
1225
      ts2=yf_fft_shm%ts2
1226

1227
      if (use_shm_2d_plan_buff) then
1228
         allocate(rout(howmany, n2*n3), &
1229
              cin(howmany, (n2/2+1)*n3))
1230

1231
         do j=1,(n2/2+1)*n3
1232
            do i=1,howmany
1233
               cin(i,j) =  ag_ptr(ts1+i-1, ts2,k1+j-1)
1234
            enddo
1235
         enddo
1236

1237
         call yf_fft_shm%execute_c2r(cin, rout)
1238

1239
         do j=1,n2*n3
1240
            do i=1,howmany
1241
               axf_ptr(ts1+i-1,ts2,k2+j-1)= rout(i,j)
1242
            end do
1243
         end do
1244
      else
1245
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1246
         ! so we have to fall back to an implied interface approach
1247
#ifdef SINGLE_PRECISION
1248
         call sfftw_execute_dft_c2r( &
1249
#else
1250
         call dfftw_execute_dft_c2r( &
1251
#endif
1252
         yf_fft_shm%plan, ag_ptr(ts1, ts2,k1), axf_ptr(ts1,ts2,k2))
1253
      end if
1254

1255
    end subroutine fft2d_shm
1256
# endif
1257

1258
  end subroutine transform2_5d_accel
×
1259

1260
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1261

1262
  !> FIXME : Add documentation
1263
  subroutine inverse2_5d_accel (axf, g)
×
1264
    use gs2_layouts, only: g_lo, accel_lo, accelx_lo, ik_idx
1265
    use job_manage, only: time_message
1266
    use fft_work, only: time_fft
1267
#ifdef SHMEM
1268
    use shm_mpi3, only : shm_info, shm_get_node_pointer, shm_node_barrier, shm_fence
1269
#endif
1270
    implicit none
1271
    real, dimension (:,:,accelx_lo%llim_proc:), target, intent (in out) :: axf
1272
    complex, dimension (:,:,g_lo%llim_proc:), target, intent (out) :: g
1273
    integer :: iglo, idx, k
1274
    integer :: itgrid, iduo
1275
    integer :: ntgrid
1276
#ifdef SHMEM
1277
    complex, pointer, contiguous ::ag_ptr(:,:,:) => null(), g_ptr(:,:,:) => null()
1278
    real, pointer, contiguous :: axf_ptr(:,:,:) => null() 
1279
    integer kk, nk, kb, bidx
1280
    integer, save :: iglo_s, iglo_e, kbs, kbe
1281
    logical, save :: firsttime =.true.
1282
#endif
1283

1284
    ntgrid = accel_lo%ntgrid
×
1285
#ifndef SHMEM
1286
    ! transform
1287
    idx = 1
×
1288
    call time_message(.false., time_fft, ' FFT')
×
1289
    do k = accelx_lo%llim_proc, accelx_lo%ulim_proc, accelx_lo%nxny
×
1290
       call yb_fft%execute_r2c(axf(:, :, k:), ag(:, :, iak(idx):))
×
1291
       idx = idx + 1
×
1292
    end do
1293
    call time_message(.false., time_fft, ' FFT')
×
1294

1295
    idx = g_lo%llim_proc
×
1296
    do k = accel_lo%llim_proc, accel_lo%ulim_proc
×
1297
       ! ignore the large k (anti-alias)
1298
       if ( .not.aidx(k)) then
×
1299
          !
1300
          !CMR+GC, 2/9/2013:
1301
          !  Scaling g here would be unnecessary if gs2 used standard Fourier coefficients.
1302
          ! different scale factors depending on ky == 0
1303
          if (ik_idx(g_lo, idx) /= 1) then
×
1304
             do iduo = 1, 2 
×
1305
                do itgrid = 1, 2*ntgrid+1
×
1306
                   g(itgrid, iduo, idx) &
×
1307
                        = 2.0 * yb_fft%scale * ag(itgrid-(ntgrid+1), iduo, k)
×
1308
                enddo
1309
             enddo
1310
          else
1311
             do iduo = 1, 2 
×
1312
                do itgrid = 1, 2*ntgrid+1
×
1313
                   g(itgrid, iduo, idx) &
×
1314
                        = yb_fft%scale * ag(itgrid-(ntgrid+1), iduo, k)
×
1315
                enddo
1316
             enddo
1317
          endif
1318
          idx = idx + 1
×
1319
       endif
1320
    enddo
1321

1322
    !CMR+GC: 2/9/2013
1323
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1324
    ! (See above comment in transform2_5d.)
1325

1326
    ! we might not have scaled all g
1327
    Do iglo = idx, g_lo%ulim_proc
×
1328
       if (ik_idx(g_lo, iglo) /= 1) then
×
1329
          g(:,:, iglo) = 2.0 * g(:,:,iglo)
×
1330
       endif
1331
    enddo
1332

1333
#else
1334
    ! shm
1335
    ag_ptr (-ntgrid:,1:,accel_lo%llim_node:) => shm_get_node_pointer(ag, -1)
1336
    axf_ptr(1:,1:,accelx_lo%llim_node:) => shm_get_node_pointer(axf, -1)
1337

1338
    call shm_node_barrier
1339
    ! transform
1340
    if ( use_shm_2d_plan) then
1341
       idx = 1 + shm_info%id
1342
       idx = mod(idx-1,accel_lo%nia) +1
1343
       nk = accelx_lo%ulim_node - accelx_lo%llim_node +1
1344
    else
1345
       idx = 1
1346
    endif
1347
    call time_message(.false., time_fft, ' FFT')
1348
    do k = accelx_lo%llim_node, accelx_lo%ulim_node, accelx_lo%nxny
1349
       if (use_shm_2d_plan) then
1350
          kk = k + shm_info%id * accelx_lo%nxny
1351
          kk = mod(kk - accelx_lo%llim_node, nk) + accelx_lo%llim_node
1352
       else
1353
          kk = k
1354
       endif
1355

1356
       call fft_shm_inv(kk, iak(idx))
1357

1358
       idx = idx + 1
1359
       if ( use_shm_2d_plan) then
1360
          idx = mod(idx-1, accel_lo%nia) +1
1361
       endif
1362
    end do
1363

1364
    ! see comment from _accel
1365
    call shm_node_barrier
1366
    call time_message(.false., time_fft, ' FFT')
1367

1368
    if(firsttime)then
1369
       firsttime=.false.
1370
       call compute_block_partition(g_lo%ntheta0 * g_lo%naky, iglo_s, iglo_e)
1371
       call compute_block_partition(accel_lo%nxnky, kbs, kbe)
1372
    end if
1373

1374
    g_ptr (1:,1:,g_lo%llim_node:) => shm_get_node_pointer(g, -1)
1375
    bidx=0
1376
    do kb = accel_lo%llim_node, accel_lo%ulim_node,accel_lo%nxnky
1377
       !      idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0 + iglo_s !g_lo%llim_node !gidx_s !g_lo%llim_proc
1378
       do k = kb+kbs, kb+kbs+kbe 
1379
          ! ignore the large k (anti-alias)
1380
          if ( .not.aidx(k)) then
1381
             !
1382
             !CMR+GC, 2/9/2013:
1383
             !  Scaling g here would be unnecessary if gs2 used standard Fourier coefficients.
1384
             ! different scale factors depending on ky == 0
1385
             do iduo = 1, 2 
1386
                do itgrid = 1, 2*ntgrid+1
1387
                   g_ptr(itgrid, iduo, gidx(k)) &
1388
                        = yb_fft%scale * ag_ptr(itgrid-(ntgrid+1), iduo, k)
1389
                enddo
1390
             enddo
1391
             !          idx = idx + 1
1392
          endif
1393
       enddo
1394
       call shm_node_barrier
1395
       call shm_fence(g_ptr(1,1,g_lo%llim_node))
1396
       ! we might not have scaled all g
1397
       idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0 
1398
       Do iglo = idx+iglo_s, idx+iglo_s+iglo_e
1399
          if (ik_idx(g_lo, iglo) /= 1) then
1400
             g_ptr(:,:, iglo) = 2.0 * g_ptr(:,:,iglo)
1401
          endif
1402
       enddo
1403
       bidx=bidx+1
1404
    enddo
1405

1406
    !CMR+GC: 2/9/2013
1407
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1408
    ! (See above comment in transform2_5d.)
1409

1410
    call shm_node_barrier
1411

1412
  contains
1413

1414
    !> FIXME : Add documentation
1415
    subroutine fft_shm_inv(k1, k2)
1416
      implicit none
1417
      integer, intent(in) :: k1, k2
1418
      if (use_shm_2d_plan) then
1419
         call fft2d_shm_inv(k1, k2)
1420
      else
1421
         call fft1d_shm_inv(k1,k2)
1422
      endif
1423
    end subroutine fft_shm_inv
1424

1425
    !> FIXME : Add documentation
1426
    subroutine fft1d_shm_inv(k1,k2)
1427
      use shm_mpi3, only : shm_info, shm_fence
1428
      implicit none
1429
      integer, intent(in) :: k1, k2
1430

1431
      integer i, j, is, ie, js, je, idw, nwk
1432
      integer nx, ny, px, py
1433

1434
      ! test ny == py !!!
1435
      nx = accelx_lo%ny
1436
      ny = accelx_lo%nx
1437
      px = accel_lo%ndky 
1438
      py = accel_lo%nx
1439

1440
      nwk = g_lo%ppn
1441
      idw = shm_info%id
1442
      is = idw*(px/nwk) + min(idw,mod(px, nwk))
1443
      ie = is + (px/nwk) -1 
1444
      if ( idw < mod(px, nwk)) ie = ie +1
1445
      js = idw*(ny/nwk) + min(idw,mod(ny, nwk))
1446
      je = js + (ny/nwk) -1 
1447
      if ( idw < mod(ny, nwk)) je = je +1
1448

1449
      do j = js, je
1450
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1451
         ! so we have to fall back to an implied interface approach
1452
#ifdef SINGLE_PRECISION
1453
         call sfftw_execute_dft_r2c( &
1454
#else
1455
         call dfftw_execute_dft_r2c( &
1456
#endif
1457
         baccel_shmx%plan, axf_ptr(1,1,k1+j*nx), ag_ptr(-ntgrid,1,k2+j*px))
1458
      end do
1459

1460
      call shm_node_barrier
1461
      call shm_fence(ag_ptr(-ntgrid,1,accel_lo%llim_node))
1462

1463
      do i = is, ie
1464
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1465
         ! so we have to fall back to an implied interface approach
1466
#ifdef SINGLE_PRECISION
1467
         call sfftw_execute_dft( &
1468
#else
1469
         call dfftw_execute_dft( &
1470
#endif
1471
         baccel_shmy%plan, ag_ptr(-ntgrid,1,k2+i), ag_ptr(-ntgrid,1,k2+i))
1472
      end do
1473
    end subroutine fft1d_shm_inv
1474

1475
    !> FIXME : Add documentation          
1476
    subroutine fft2d_shm_inv(k1,k2)
1477
      use, intrinsic :: iso_c_binding 
1478
      use shm_mpi3, only : shm_info, shm_fence
1479
      implicit none
1480
      integer, intent(in) :: k1, k2
1481

1482
      integer i,j, ts1, ts1o, ts2, n2,n3, howmany
1483
      real, allocatable :: rin(:,:)
1484
      complex,allocatable::cout(:,:)
1485

1486
      n2 = accel_lo%ny
1487
      n3 = accel_lo%nx
1488
      ts1 = yb_fft_shm%ts1
1489
      ts2=yb_fft_shm%ts2
1490
      howmany=yb_fft_shm%howmany
1491
      ts1o=ntgrid +1+ts1
1492

1493
      if ( use_shm_2d_plan_buff) then 
1494
         allocate( rin(howmany, n2*n3), &
1495
              cout(howmany,(n2/2+1)*n3))
1496

1497
         do j=1,n2*n3
1498
            do i=1,howmany
1499
               rin(i,j) =  axf_ptr(ts1o+i-1,ts2,k1+j-1)
1500
            enddo
1501
         enddo
1502
         call yb_fft_shm%execute_r2c(rin, cout)
1503
         do j=1,(n2/2+1)*n3
1504
            do i=1,howmany
1505
               ag_ptr(ts1+i-1,ts2,k2+j-1) = cout(i,j)
1506
            enddo
1507
         enddo
1508
      else
1509
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1510
         ! so we have to fall back to an implied interface approach
1511
#ifdef SINGLE_PRECISION
1512
         call sfftw_execute_dft_r2c( &
1513
#else
1514
         call dfftw_execute_dft_r2c( &
1515
#endif
1516
         yb_fft_shm%plan, axf_ptr(ts1o,ts2,k1), ag_ptr(ts1,ts2,k2))
1517
      end if
1518
    end subroutine fft2d_shm_inv
1519
# endif
1520
  end subroutine inverse2_5d_accel
×
1521

1522
  !> FIXME : Add documentation        
1523
  subroutine init_3d (nny_in, nnx_in, how_many_in)
×
1524
    use fft_work, only: init_crfftw, init_rcfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
1525
    implicit none
1526
    integer, intent(in) :: nny_in, nnx_in, how_many_in
1527
    integer, save :: nnx = 0, nny = 0, how_many = 0
1528

1529
    ! If we've already initialised check if any of the sizes have changed,
1530
    ! if not we can just return, otherwise delete plans and start again
1531
    if (initialized_3d) then
×
1532
       if (nnx /= nnx_in .or. nny /= nny_in .or. how_many /= how_many_in) then
×
1533
          call xf3d_cr%delete
×
1534
          call xf3d_rc%delete
×
1535
       else
1536
          return
×
1537
       end if
1538
    end if
1539
    initialized_3d = .true.
×
1540
    nny = nny_in
×
1541
    nnx = nnx_in
×
1542
    how_many = how_many_in
×
1543

1544
    call init_crfftw (xf3d_cr, FFT_TO_REAL_SPACE, nny, nnx, how_many)
×
1545
    call init_rcfftw (xf3d_rc, FFT_TO_SPECTRAL_SPACE, nny, nnx, how_many)
×
1546
  end subroutine init_3d
1547

1548
  !> FIXME : Add documentation        
1549
  subroutine transform2_3d (phi, phixf, nny, nnx)
×
1550
    use theta_grid, only: ntgrid
1551
    use kt_grids, only: naky, ntheta0, aky
1552
    use job_manage, only: time_message
1553
    use fft_work, only: time_fft
1554
    implicit none
1555
    integer, intent(in) :: nnx, nny
1556
    complex, dimension (-ntgrid:,:,:), intent (in) :: phi
1557
    real, dimension (:,:,-ntgrid:), intent (out) :: phixf  
1558
    real, dimension (:,:,:), allocatable :: phix
×
1559
    complex, dimension (:,:,:), allocatable :: aphi
×
1560
    real :: fac
1561
    integer :: ig, ik, it
1562

1563
    ! scale, dealias and transpose
1564
    call init_3d (nny, nnx, 2*ntgrid+1)
×
1565

1566
    allocate (phix (-ntgrid:ntgrid, nny, nnx))
×
1567
    allocate (aphi (-ntgrid:ntgrid, nny/2+1, nnx))
×
1568
    aphi = 0.
×
1569

1570
    do ik = 1, naky
×
1571
       fac = 0.5
×
1572
       if (aky(ik) < epsilon(0.)) fac = 1.0
×
1573
       do it = 1, (ntheta0+1) / 2
×
1574
          do ig = -ntgrid, ntgrid
×
1575
             aphi(ig, ik, it) = phi(ig, it, ik) * fac !Note transpose of it and ik here
×
1576
          end do
1577
       end do
1578
       do it = (ntheta0+1) / 2 + 1, ntheta0
×
1579
          do ig = -ntgrid, ntgrid
×
1580
             !CMR, 30/3/2010: bug fix to replace nx by nnx on next line
1581
             aphi(ig, ik, it-ntheta0+nnx) = phi(ig,it,ik) * fac
×
1582
          end do
1583
       end do
1584
    end do
1585

1586
    ! transform
1587
    call time_message(.false., time_fft, ' FFT')
×
1588
    call xf3d_cr%execute_c2r(aphi, phix)
×
1589
    call time_message(.false., time_fft, ' FFT')
×
1590

1591
    do it = 1, nnx
×
1592
       do ik = 1, nny
×
1593
          do ig = -ntgrid, ntgrid
×
1594
             phixf(it, ik, ig) = phix(ig, ik, it)
×
1595
          end do
1596
       end do
1597
    end do
1598

1599
    deallocate (aphi, phix)
×
1600

1601
  end subroutine transform2_3d
×
1602

1603
  !> FIXME : Add documentation
1604
  subroutine inverse2_3d (phixf, phi, nny, nnx)
×
1605
    !CMR, 30/4/2010:
1606
    ! Fixed up previously buggy handling of dealiasing.
1607
    use theta_grid, only: ntgrid
1608
    use kt_grids, only: naky, ntheta0, aky
1609
    use job_manage, only: time_message
1610
    use fft_work, only: time_fft
1611
    implicit none
1612
    real, dimension (:,:,-ntgrid:), intent(in) :: phixf
1613
    complex, dimension (-ntgrid:,:,:), intent(out) :: phi
1614
    integer, intent(in) :: nnx, nny
1615
    complex, dimension (:,:,:), allocatable :: aphi
×
1616
    real, dimension (:,:,:), allocatable :: phix
×
1617
    real :: fac
1618
    integer :: ik, it, ig
1619
    ! Note assumes init_3d has already been called
1620
    allocate (aphi (-ntgrid:ntgrid, nny/2+1, nnx))
×
1621
    allocate (phix (-ntgrid:ntgrid, nny, nnx))
×
1622

1623
    do it = 1, nnx
×
1624
       do ik = 1, nny
×
1625
          do ig = -ntgrid, ntgrid
×
1626
             phix(ig, ik, it) = phixf(it, ik, ig)
×
1627
          end do
1628
       end do
1629
    end do
1630

1631
    ! transform
1632
    call time_message(.false., time_fft, ' FFT')
×
1633
    call xf3d_rc%execute_r2c(phix, aphi)
×
1634
    call time_message(.false., time_fft, ' FFT')
×
1635

1636
    ! dealias and scale
1637
    do it = 1, (ntheta0+1) / 2
×
1638
       do ik = 1, naky
×
1639
          fac = 2.0
×
1640
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1641
          do ig = -ntgrid, ntgrid
×
1642
             phi(ig, it, ik) = aphi(ig, ik, it) * fac * xf3d_rc%scale
×
1643
          end do
1644
       end do
1645
    end do
1646

1647
    !CMR, 30/4/2010:  fixed up previously buggy handling of dealiasing
1648
    do it = (ntheta0+1) / 2 + 1, ntheta0
×
1649
       do ik = 1, naky
×
1650
          fac = 2.0
×
1651
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1652
          do ig = -ntgrid, ntgrid
×
1653
             phi(ig,it,ik) = aphi(ig, ik, it-ntheta0+nnx) * fac * xf3d_rc%scale
×
1654
          end do
1655
       end do
1656
    end do
1657

1658
    deallocate (aphi, phix)
×
1659

1660
  end subroutine inverse2_3d
×
1661

1662
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1663

1664
  !> FIXME : Add documentation
1665
  subroutine transform2_2d (phi, phixf, nny, nnx)
×
1666
    use fft_work, only: FFT_TO_REAL_SPACE, init_crfftw, time_fft
1667
    use kt_grids, only: naky, nakx => ntheta0, nx, aky
1668
    use job_manage, only: time_message
1669
    implicit none
1670
    integer, intent(in) :: nnx, nny
1671
    complex, dimension(:, :), intent (in) :: phi
1672
    real, dimension(:, :), intent (out) :: phixf
1673
    real, dimension(:, :), allocatable :: phix
×
1674
    complex, dimension(:, :), allocatable :: aphi
×
1675
    real :: fac
1676
    integer :: ik, it
1677
    type (fft_type) :: xf2d
1678

1679
    !May be inefficient to create and destroy this fft plan
1680
    !on every call to the routine. We may want to move this
1681
    !variable to module level and check its created flag.
1682
    call init_crfftw (xf2d, FFT_TO_REAL_SPACE, nny, nnx, 1)
×
1683

1684
    allocate (phix (nny, nnx))
×
1685
    allocate (aphi (nny/2+1, nnx))
×
1686
    phix(:,:)=0.; aphi(:,:)=cmplx(0.,0.)
×
1687

1688
    ! scale, dealias and transpose
1689
    do ik = 1, naky
×
1690
       fac = 0.5
×
1691
       if (aky(ik) < epsilon(0.)) fac = 1.0
×
1692
       do it = 1, (nakx+1) / 2
×
1693
          aphi(ik, it) = phi(it, ik) * fac
×
1694
       end do
1695
       do it = (nakx+1) / 2 + 1, nakx
×
1696
          aphi(ik, it-nakx+nx) = phi(it, ik) * fac
×
1697
       end do
1698
    end do
1699

1700
    ! transform
1701
    call time_message(.false., time_fft, ' FFT')
×
1702
    call xf2d%execute_c2r(aphi, phix)
×
1703
    call time_message(.false., time_fft, ' FFT')
×
1704

1705
    phixf = transpose(phix)
×
1706

1707
    deallocate (aphi, phix)
×
1708
    !RN> this statement causes error for lahey with DEBUG. I don't know why
1709
    !<DD>Reinstating after discussion with RN, if this causes anyone an issue
1710
    !    then we can guard this line with some directives.   
1711
    call xf2d%delete
×
1712
  end subroutine transform2_2d
×
1713

1714
  !> FIXME : Add documentation
1715
  subroutine inverse2_2d (phixf, phi, nny, nnx)
×
1716
    use fft_work, only: FFT_TO_SPECTRAL_SPACE, init_rcfftw, time_fft
1717
    use kt_grids, only: naky, nakx => ntheta0, aky
1718
    use job_manage, only: time_message
1719
    implicit none
1720
    real, intent(in) :: phixf(:,:)
1721
    complex, intent(out) :: phi(:,:)
1722
    integer, intent(in) :: nnx, nny
1723
    complex, allocatable :: aphi(:,:)
×
1724
    real, allocatable :: phix(:,:)
×
1725
    real :: fac
1726
    integer :: ik, it
1727
    type (fft_type) :: xf2d
1728

1729
    !May be inefficient to create and destroy this fft plan
1730
    !on every call to the routine. We may want to move this
1731
    !variable to module level and check its created flag.
1732
    call init_rcfftw (xf2d, FFT_TO_SPECTRAL_SPACE, nny, nnx, 1)
×
1733

1734
    allocate (aphi (nny/2+1, nnx))
×
1735
    allocate (phix (nny, nnx))
×
1736
    phix = 0.; aphi = cmplx(0.,0.)
×
1737
    phix = transpose(phixf)
×
1738

1739
    ! transform
1740
    call time_message(.false., time_fft, ' FFT')
×
1741
    call xf2d%execute_r2c(phix, aphi)
×
1742
    call time_message(.false., time_fft, ' FFT')
×
1743

1744
    ! scale, dealias and transpose
1745
    do it = 1, nakx
×
1746
       do ik = 1, naky
×
1747
          fac = 2.0
×
1748
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1749
          phi(it, ik) = aphi(ik, it) * fac * xf2d%scale
×
1750
       end do
1751
    end do
1752

1753
    deallocate (aphi, phix)
×
1754
    !RN> this statement causes error for lahey with DEBUG. I don't know why
1755
    !<DD>Reinstating after discussion with RN, if this causes anyone an issue
1756
    !    then we can guard this line with some directives.   
1757
    call xf2d%delete
×
1758
  end subroutine inverse2_2d
×
1759

1760
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1761

1762
  !> FIXME : Add documentation        
1763
  subroutine init_zf (ntgrid, howmany)
×
1764
    use fft_work, only: init_z, FFT_TO_REAL_SPACE
1765
    implicit none
1766
    integer, intent (in) :: ntgrid, howmany
1767

1768
    if (initialized_zf) return
×
1769
    initialized_zf = .true.
×
1770

1771
    ! Note we pass 2*ntgrid rather than 2*ntgrid + 1 so that we don't
1772
    ! include the duplicate theta point at {-pi,pi}
1773
    call init_z(zf_fft, FFT_TO_REAL_SPACE, 2*ntgrid, howmany)
×
1774
  end subroutine init_zf
1775

1776
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1777

1778
  !> FIXME : Add documentation        
1779
  subroutine kz_spectrum (an, an2)
×
1780
    use job_manage, only: time_message
1781
    use fft_work, only: time_fft
1782
    complex, dimension (:,:,:), intent(in out)  :: an
1783
    complex, dimension (:,:,:), intent(out) :: an2
1784
    an2 = 0.0
×
1785
    call time_message(.false., time_fft, ' FFT')
×
1786
    call zf_fft%execute_c2c(an, an2)
×
1787
    call time_message(.false., time_fft, ' FFT')
×
1788
    an2 = conjg(an2)*an2
×
1789
  end subroutine kz_spectrum
×
1790

1791
  !> FIXME : Add documentation
1792
  subroutine finish_transforms
×
1793
    use redistribute, only : delete_redist
1794
    use fft_work, only: finish_fft_work
1795
#ifdef SHMEM
1796
    use shm_mpi3, only : shm_free
1797
#endif
1798

1799
    if(allocated(xxf)) deallocate(xxf)
×
1800
    if(allocated(ia)) deallocate(ia)
×
1801
    if(allocated(iak)) deallocate(iak)
×
1802
    if(allocated(aidx)) deallocate(aidx)
×
1803
#ifndef SHMEM
1804
    if(allocated(ag)) deallocate(ag)
×
1805
#else
1806
    if (associated(ag)) call shm_free(ag)
1807
#endif
1808

1809
    call delete_redist(g2x)
×
1810
    call delete_redist(x2y)
×
1811

1812
    if(allocated(fft)) deallocate(fft) 
×
1813

1814
    !Destroy fftw plans
1815
    call yf_fft%delete
×
1816
    call yb_fft%delete
×
1817
    call xf_fft%delete
×
1818
    call xb_fft%delete
×
1819
    call zf_fft%delete
×
1820
    call xf3d_cr%delete
×
1821
    call xf3d_rc%delete
×
1822

1823
    !Reset init state flags
1824
    initialized = .false.
×
1825
    initialized_y_fft = .false.
×
1826
    initialized_x_redist = .false.
×
1827
    initialized_y_redist = .false.
×
1828
    initialized_3d = .false.
×
1829
    initialized_zf = .false.
×
1830

1831
    !Tidy up fft internals (FFTW3 only)
1832
    call finish_fft_work
×
1833
  end subroutine finish_transforms
×
1834

1835
#ifdef SHMEM
1836
  !> FIXME : Add documentation
1837
  subroutine compute_block_partition(whole_range, shift, extend)
1838
    ! computes shift + extend
1839
    use shm_mpi3, only : shm_info
1840
    implicit none
1841

1842
    integer, intent(in) :: whole_range
1843
    integer, intent(out) :: shift, extend
1844

1845
    integer n, chnk
1846

1847
    n = whole_range
1848
    chnk = n / shm_info%size
1849
    shift = shm_info%id * chnk + min(shm_info%id, mod(n,shm_info%size))
1850
    extend = chnk-1
1851
    if (shm_info%id< mod(n,shm_info%size)) extend= extend + 1
1852

1853
  end subroutine compute_block_partition
1854
#endif       
1855
end module gs2_transforms
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc