• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

gyrokinetics / gs2 / 2021218321

04 Sep 2025 07:44AM UTC coverage: 10.606% (+0.03%) from 10.577%
2021218321

push

gitlab-ci

David Dickinson
Merged in feature/move_more_initialisation_to_init_levels (pull request #1161)

4710 of 44407 relevant lines covered (10.61%)

125698.1 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/src/gs2_transforms.fpp
1
#include "unused_dummy.inc"
2
! Modifications for using FFTW version 3:
3
! (c) The Numerical Algorithms Group (NAG) Ltd, 2009 
4
!                                 on behalf of the HECToR project
5
!> FIXME : Add documentation
6
module gs2_transforms
7
  use redistribute, only: redist_type
8
  use fft_work, only: fft_type
9

10
  implicit none
11

12
  private
13

14
  public :: init_transforms, finish_transforms, init_zf, get_accel
15
  public :: transform2, inverse2, kz_spectrum
16
  public :: transform_x, transform_y, inverse_x, inverse_y ! Only for testing
17
  public :: g2x, x2y
18
  logical :: initialized=.false., initialized_y_fft=.false.
19
  logical :: initialized_x_redist=.false., initialized_y_redist=.false.
20
  logical :: initialized_zf = .false., initialized_3d=.false.
21

22
  interface transform_x
23
     module procedure transform_x5d
24
  end interface transform_x
25

26
  interface transform_y
27
     module procedure transform_y5d
28
  end interface transform_y
29

30
  interface transform2
31
     module procedure transform2_5d_accel
32
     module procedure transform2_5d
33
     module procedure transform2_3d
34
     module procedure transform2_2d !Not actually used, but gives symmetry to inverse2_2d
35
  end interface transform2
36

37
  interface inverse_x
38
     module procedure inverse_x5d
39
  end interface inverse_x
40

41
  interface inverse_y
42
     module procedure inverse_y5d
43
  end interface inverse_y
44

45
  interface inverse2
46
     module procedure inverse2_5d_accel
47
     module procedure inverse2_5d
48
     module procedure inverse2_3d
49
     module procedure inverse2_2d
50
  end interface inverse2
51

52
  ! redistribution
53
  type (redist_type), save :: g2x, x2y
54

55
  ! fft
56
  type (fft_type), save :: xf_fft, xb_fft, yf_fft, yb_fft, zf_fft
57
  type (fft_type), save :: xf3d_cr, xf3d_rc
58
#ifdef SHMEM
59
  type (fft_type), save :: faccel_shmx, faccel_shmy, baccel_shmx, baccel_shmy
60
  type (fft_type), save :: yf_fft_shm, yb_fft_shm
61
  logical, save :: use_shm_2d_plan=.false., use_shm_2d_plan_buff=.false.
62
  integer, dimension(:), allocatable :: gidx
63
#endif
64

65
  ! accel will be set to true if the v layout is used AND the number of
66
  ! PEs is such that each PE has a complete copy of the x,y space --
67
  ! in that case, no communication is needed to evaluate the nonlinear
68
  ! terms
69
  logical :: accel = .false.
70

71
  logical, dimension(:), allocatable :: aidx  ! aidx == aliased index
72
  integer, dimension(:), allocatable :: ia, iak
73
  complex, dimension(:, :), allocatable :: fft, xxf
74
#ifndef SHMEM
75
  complex, dimension(:, :, :), allocatable :: ag
76
#else
77
  complex, save, dimension(:, :, :), pointer, contiguous :: ag => null()
78
#endif
79

80
contains
81

82
  pure logical function get_accel(negrid, nlambda, nspec) result(accel)
×
83
    use gs2_layouts, only: pe_layout
84
    use mp, only: nproc
85
    integer, intent(in) :: negrid, nlambda, nspec
86
    character(1) :: char
87
    call pe_layout (char)
×
88

89
#ifndef SHMEM
90
    accel = (char == 'v') .and. (mod(negrid * nlambda * nspec, nproc) == 0)
×
91
#else
92
    accel = (char == 'v')
93
    UNUSED_DUMMY(negrid); UNUSED_DUMMY(nlambda); UNUSED_DUMMY(nspec)
94
#endif
95
  end function get_accel
×
96

97
  !> FIXME : Add documentation
98
  subroutine init_transforms(ntgrid, nlambda, negrid, nspec, nx, ny, accelerated)
×
99
    use mp, only: proc0, mp_abort
100
    use gs2_layouts, only: fft_wisdom_file, fft_use_wisdom, fft_measure_plan
101
    use fft_work, only: load_wisdom, save_wisdom, measure_plan
102
    implicit none
103
    integer, intent (in) :: ntgrid, nlambda, negrid, nspec, nx, ny
104
    logical, intent (out) :: accelerated
105
    logical, parameter :: debug = .false.
106

107
    accelerated = get_accel(negrid, nlambda, nspec)
×
108

109
    !Early exit if possible
110
    if (initialized) return
×
111
    initialized = .true.
×
112

113
    accel = accelerated
×
114

115
    ! If either nx or ny are zero then we cannot setup the transforms so
116
    ! detect this and abort with a helpful message
117
    if (nx == 0 .or. ny == 0) then
×
118
       call mp_abort("Trying to initialise gs2 transforms but nx and / or ny are zero. Did you remember to set `grid_option = 'box'` in `kt_grids_knobs`?", .true.)
×
119
    end if
120

121
    measure_plan = fft_measure_plan
×
122
    if (fft_use_wisdom) call load_wisdom(trim(fft_wisdom_file))
×
123

124
    if (accel) then
×
125
       call init_accel_fft(ntgrid)
×
126
    else
127
       !Recommended for p+log(p)>log(N) where p is number of
128
       !processors and N is total number of mesh points. Could automate
129
       !selection, though above condition is only fairly rough
130
       if (debug) write (*,*) 'init_transforms: init_y_redist_local'
131
       ! Note also calls init_x_redist_local
132
       call init_y_redist_local
×
133

134
       if (debug) write (*,*) 'init_transforms: init_xy_fft'
135
       call init_xy_fft
×
136
    end if
137

138
    if (debug) write (*,*) 'init_transforms: done'
139
    if (proc0 .and. fft_use_wisdom) call save_wisdom(trim(fft_wisdom_file))
×
140
  end subroutine init_transforms
141

142
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
143

144
  !> Setup accelerated 2D or SHMEM FFT(s)
145
  subroutine init_accel_fft(ntgrid)
×
146
    use gs2_layouts, only: accel_lo, accelx_lo, dealiasing
147
    use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
148
#ifdef SHMEM
149
    use shm_mpi3, only: shm_info, shm_alloc
150
    use mp, only: proc0
151
    use gs2_layouts, only: g_lo, accel_lo
152
    integer :: howmany, stride, n2, n3, idw, nwk, envlen, ts1, ts2
153
    character(len=1) :: envval
154
    complex, dimension(:, :), allocatable :: dummy
155
#endif
156
    integer, intent(in) :: ntgrid
157
    integer :: i, idx
158

159
    if (initialized_y_fft) return
×
160
    initialized_y_fft = .true.
×
161

162
    !JH FFTW plan creation for the accelerated 2d transforms -- do we need this
163
    !when we have SHMEM?
164
    call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
165
         (2*accel_lo%ntgrid+1) * 2)
×
166
    call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
167
         (2*accel_lo%ntgrid+1) * 2)
×
168

169
    ! prepare for dealiasing
170
    allocate (ia (accel_lo%nia), iak(accel_lo%nia))
×
171

172
#ifndef SHMEM
173
    allocate (ag(-ntgrid:ntgrid, 2, accel_lo%llim_proc:accel_lo%ulim_alloc))
×
174
    allocate (aidx(accel_lo%llim_proc:accel_lo%ulim_proc))
×
175

176
    do i = accel_lo%llim_proc, accel_lo%ulim_proc
×
177
       aidx(i) = dealiasing(accel_lo, i)
×
178
    end do
179

180
    do idx = 1, accel_lo%nia
×
181
       ia (idx) = accelx_lo%llim_proc + (idx-1)*accelx_lo%nxny
×
182
       iak(idx) = accel_lo%llim_proc  + (idx-1)*accel_lo%nxnky
×
183
    end do
184
#else
185
    call shm_alloc(ag, [-ntgrid, ntgrid, 1, 2, accel_lo%llim_proc, accel_lo%ulim_alloc])
186
    allocate (gidx(accel_lo%llim_node:accel_lo%ulim_node))
187
    gidx = -1
188
    idx = g_lo%llim_node
189
    allocate (aidx(accel_lo%llim_node:accel_lo%ulim_node))
190
    do i = accel_lo%llim_node, accel_lo%ulim_node
191
       aidx(i) = dealiasing(accel_lo, i)
192
       if (aidx(i)) cycle
193
       gidx(i) = idx; idx = idx+1
194
    end do
195

196
    do idx = 1, accel_lo%nia
197
       ia (idx) = accelx_lo%llim_node + (idx-1)*accelx_lo%nxny
198
       iak(idx) = accel_lo%llim_node  + (idx-1)*accel_lo%nxnky
199
    end do
200

201
    call get_environment_variable("GS2_SHM_2D_PLAN",VALUE=envval,LENGTH=envlen)
202
    if (envlen >0) read(envval,*) use_shm_2d_plan
203
    call get_environment_variable("GS2_SHM_2D_PLAN_BUFF",VALUE=envval, LENGTH=envlen)
204
    if (envlen >0) read(envval,*) use_shm_2d_plan_buff
205
    if (proc0) then
206
       if (use_shm_2d_plan) then
207
          write(*,*)' SHM with 2D plans'
208
          if (use_shm_2d_plan_buff) then
209
             write(*,*)' SHM 2D plans with buffering'
210
          endif
211
       else
212
          write(*,*)' SHM with 1D plans'
213
       endif
214
    endif
215

216
    if (use_shm_2d_plan) then
217
       n2 = accel_lo%ny
218
       n3 = accel_lo%nx
219

220
       ! attention, trouble if ppn =1
221
       if ( shm_info%size > 1 ) then
222
          if (shm_info%id < shm_info%size / 2) then
223
             idw = shm_info%id
224
             ts2=1
225
             nwk = shm_info%size / 2
226
          else
227
             idw = shm_info%id - shm_info%size / 2
228
             ts2=2
229
             nwk = shm_info%size - shm_info%size / 2! for odd ppn
230
          endif
231
       else
232
          idw = 0
233
          ts2=1
234
          nwk=1
235
       endif
236
       ts1 = -ntgrid + (2*ntgrid+1)/nwk * idw &
237
            + min(mod(2*ntgrid+1,nwk), idw)
238
       howmany =  (2*ntgrid+1)/nwk
239
       if ( idw < mod(2*ntgrid+1,nwk)) howmany = howmany+1
240
       if(shm_info%size == 1) howmany = 2*(2*ntgrid+1)
241

242
       if ( use_shm_2d_plan_buff) then
243
          stride=howmany
244
       else
245
          stride=2*(2*accel_lo%ntgrid+1)
246
       endif
247

248
       call init_crfftw (yf_fft_shm, FFT_TO_REAL_SPACE, accel_lo%ny, accel_lo%nx, &
249
            howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
250

251
       ! now the inverse
252
       call init_rcfftw (yb_fft_shm, FFT_TO_SPECTRAL_SPACE, accel_lo%ny, accel_lo%nx, &
253
            howmany, stride) !(2*accel_lo%ntgrid+1) * 2)
254

255
       ! init_... has intent out for plan var
256
       yf_fft_shm%ts1=ts1
257
       yf_fft_shm%ts2=ts2
258
       yb_fft_shm%ts1=ts1
259
       yb_fft_shm%ts2=ts2
260

261
    else
262
       call init_crfftw(faccel_shmx, FFT_TO_REAL_SPACE, accel_lo%ny, 2*(2*ntgrid+1), .true.)
263
       allocate(dummy( 2*(2*ntgrid+1), accel_lo%ndky * accel_lo%nx))
264
       call init_ccfftw(faccel_shmy, FFT_TO_REAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true., accel_lo%ndky)
265
       deallocate(dummy)
266
       call init_rcfftw(baccel_shmx, FFT_TO_SPECTRAL_SPACE, accelx_lo%ny, 2*(2*ntgrid+1), .true.)
267
       allocate(dummy( 2*(2*ntgrid+1), accelx_lo%ny * accelx_lo%nx))
268
       call init_ccfftw(baccel_shmy, FFT_TO_SPECTRAL_SPACE, accel_lo%nx, 2*(2*ntgrid+1), dummy, .true.,accel_lo%ndky)
269
       deallocate(dummy)
270
    end if
271
#endif
272
  end subroutine init_accel_fft
273

274
  !> Setup non-accelerated separate x and y FFTs
275
  subroutine init_xy_fft
×
276
    use gs2_layouts, only: xxf_lo, yxf_lo
277
    use fft_work, only: init_crfftw, init_rcfftw, init_ccfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
278
    implicit none
279
    integer :: nb_ffts
280

281
    if (initialized_y_fft) return
×
282
    initialized_y_fft = .true.
×
283

284
    if (.not. allocated(fft)) allocate(fft(yxf_lo%ny/2+1, yxf_lo%llim_proc:yxf_lo%ulim_alloc))
×
285
    if (.not. allocated(xxf)) allocate(xxf(xxf_lo%nx,xxf_lo%llim_proc:xxf_lo%ulim_alloc))
×
286

287
    !JH FFTW plan creation for transform x5d and inverse
288
    ! number of ffts to be calculated
289
    !JH 7th December 2011
290
    !JH xxf_lo%ulim_alloc is used here rather than x/yxf_lo%ulim_proc
291
    !JH because there are situations where x/yxf_lo%llim_proc is greater
292
    !JH than x/yxf_lo%ulim_proc and that would create a negative number
293
    !JH of FFTs to be calculated.  However, x/yxf_lo%ulim_alloc is set
294
    !JH to be x/yxf_lo%llim_proc in this situation, and that will give
295
    !JH 1 FFT to be calculated which the code can correctly undertake.
296
    nb_ffts = xxf_lo%ulim_alloc - xxf_lo%llim_proc + 1
×
297
    call init_ccfftw (xf_fft, FFT_TO_REAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
×
298
    call init_ccfftw (xb_fft, FFT_TO_SPECTRAL_SPACE, xxf_lo%nx, nb_ffts, xxf)
×
299

300
    nb_ffts = yxf_lo%ulim_alloc - yxf_lo%llim_proc + 1
×
301
    call init_crfftw (yf_fft, FFT_TO_REAL_SPACE, yxf_lo%ny, nb_ffts)
×
302
    call init_rcfftw (yb_fft, FFT_TO_SPECTRAL_SPACE,  yxf_lo%ny, nb_ffts)
×
303
  end subroutine init_xy_fft
304

305
  !> Constructs the redistribute mapping from the g_lo data
306
  !> decomposition to the xxf_lo decomposition.
307
  subroutine setup_x_redist_local(g_lo, xxf_lo, g2x)
×
308
    use layouts_type, only: g_layout_type, xxf_layout_type
309
    use gs2_layouts, only: proc_id, idx_local, opt_local_copy, layout
310
    use gs2_layouts, only: ik_idx,il_idx,ie_idx,is_idx,idx, ig_idx, isign_idx
311
    use mp, only: nproc
312
    use redistribute, only: index_list_type, init_redist, delete_list
313
    use redistribute, only: set_redist_character_type, set_xxf_optimised_variables
314
    use sorting, only: quicksort
315
    implicit none
316
    type(g_layout_type), intent(in) :: g_lo
317
    type(xxf_layout_type), intent(in) :: xxf_lo
318
    type(redist_type), intent(in out) :: g2x
319
    type (index_list_type), dimension(0:nproc-1) :: to_list, from_list, sort_list
×
320
    integer, dimension(0:nproc-1) :: nn_to, nn_from
×
321
    integer, dimension (3) :: from_low, from_high
322
    integer, dimension (2) :: to_high
323
    integer :: to_low
324
    integer :: iglo, isign, ig, it, ixxf, it0
325
    integer :: n, ip, il, ik, ie, is
326

327
    ! count number of elements to be redistributed to/from each processor
328
    nn_to = 0
×
329
    nn_from = 0
×
330

331
    !Here we loop over the whole domain (so this doesn't get cheaper with more cores)
332
    !This is required to ensure that we can associate send and receive message elements
333
    !i.e. so we know that we put the received data in the correct place.
334
    !However, as we know the order the iglo,isign,ig indices should increase we could
335
    !loop over our ixxf local range, calculate the corresponding iglo,isign and ig indices
336
    !and sort the ixxf messages on this to ensure they're in the correct order.
337
    !Either way when just counting how much data we are going to send and receive we don't
338
    !care about order so we can just loop over our local range!
339
    !First count the sends | g_lo-->xxf_lo
340
    !Protect against procs with no data
341
    if(g_lo%ulim_proc>=g_lo%llim_proc)then
×
342
       do iglo = g_lo%llim_proc, g_lo%ulim_alloc
×
343
          il = il_idx(g_lo, iglo)
×
344

345
          !Convert iglo,isign=1,ig=-ntgrid into ixxf
346
          ixxf=idx(xxf_lo,-g_lo%ntgrid,1,ik_idx(g_lo,iglo),&
347
               il, ie_idx(g_lo,iglo),is_idx(g_lo,iglo))
×
348

349
          !Now loop over other local dimensions
350
          do isign = 1, 2
×
351
             do ig = -g_lo%ntgrid, g_lo%ntgrid
×
352
                ip = proc_id(xxf_lo, ixxf)
×
353

354
                ! Don't send data from forbidden region, unless it is going to
355
                ! this processor (i.e. local copy) due to assumptions in opt_local_copy
356
                ! based methods.
357
                if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
358
                   !Increase the data send count for the proc which has the ixxf
359
                   nn_from(ip)=nn_from(ip)+1
×
360
                end if
361

362
                !Increase ixxf using knowledge of the xxf_lo layout
363
                ixxf=ixxf+xxf_lo%naky
×
364
             enddo
365
          enddo
366
       enddo
367
    endif
368

369
    !Now count the receives | xxf_lo<--g_lo
370
    !Protect against procs with no data
371
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
372
       do ixxf = xxf_lo%llim_proc, xxf_lo%ulim_alloc
×
373
          !Could split it (or x) domain into two parts to account for
374
          !difference in it (or x) meaning and order in g_lo and xxf_lo
375
          !but only interested in how much data we receive and not the
376
          !exact indices (yet) so just do 1-->ntheta0 (this is how many
377
          !non-zero x's?)
378
          ik = ik_idx(xxf_lo, ixxf)
×
379
          il = il_idx(xxf_lo, ixxf)
×
380
          ie = ie_idx(xxf_lo, ixxf)
×
381
          is = is_idx(xxf_lo, ixxf)
×
382
          ig = ig_idx(xxf_lo, ixxf)
×
383
          do it=1,xxf_lo%ntheta0
×
384
             !Convert ixxf,it indices into iglo, ig and isign indices
385
             iglo=idx(g_lo, ik, it, il, ie, is)
×
386

387
             ip = proc_id(g_lo, iglo)
×
388

389
             ! Don't send data from forbidden region, unless it is going to
390
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
391
             ! based methods.
392
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
393

394
             !Increase the data to receive count for proc with this data
395
             !Note, we only worry about iglo and not isign/ig because we know that
396
             !in g_lo each proc has all ig and isign domain.
397
             !The xxf_lo domain contains ig and isign so we only need to add one
398
             !to the count for each ixxf.
399
             nn_to(proc_id(g_lo,iglo))=nn_to(proc_id(g_lo,iglo))+1
×
400
          enddo
401
       enddo
402
    endif
403

404
    !Now allocate storage for data mapping indices
405
    do ip = 0, nproc-1
×
406
       if (nn_from(ip) > 0) then
×
407
          allocate (from_list(ip)%first(nn_from(ip)))
×
408
          allocate (from_list(ip)%second(nn_from(ip)))
×
409
          allocate (from_list(ip)%third(nn_from(ip)))
×
410
       end if
411
       if (nn_to(ip) > 0) then
×
412
          allocate (to_list(ip)%first(nn_to(ip)))
×
413
          allocate (to_list(ip)%second(nn_to(ip)))
×
414
          !For sorting to_list later
415
          allocate (sort_list(ip)%first(nn_to(ip)))
×
416
       end if
417
    end do
418

419
    !Reinitialise count arrays to zero
420
    nn_to = 0
×
421
    nn_from = 0
×
422

423
    !First fill in the sending indices, these define the messages data order
424
    !Protect against procs with no data
425
    if(g_lo%ulim_proc>=g_lo%llim_proc)then
×
426
       do iglo=g_lo%llim_proc, g_lo%ulim_alloc
×
427
          il = il_idx(g_lo, iglo)
×
428

429
          !Convert iglo,isign=1,ig=-ntgrid into ixxf
430
          ixxf=idx(xxf_lo,-g_lo%ntgrid,1,ik_idx(g_lo,iglo),&
431
               il, ie_idx(g_lo,iglo),is_idx(g_lo,iglo))
×
432

433
          !Now loop over other local dimensions
434
          do isign = 1, 2
×
435
             do ig = -g_lo%ntgrid, g_lo%ntgrid
×
436
                !Get proc id
437
                ip=proc_id(xxf_lo,ixxf)
×
438
                if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
439

440
                   !Increment procs message counter
441
                   n=nn_from(ip)+1
×
442
                   nn_from(ip)=n
×
443

444
                   !Store indices
445
                   from_list(ip)%first(n) = ig
×
446
                   from_list(ip)%second(n) = isign
×
447
                   from_list(ip)%third(n) = iglo
×
448
                end if
449

450
                !We could send this information, transformed to the xxf layout to the proc.
451

452
                !Increment counter
453
                ixxf=ixxf+xxf_lo%naky
×
454
             enddo
455
          enddo
456
       enddo
457
    endif
458

459
    !Now lets fill in the receiving indices, these must match the messages data order
460
    !Protect against procs with no data
461
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
462
       do ixxf = xxf_lo%llim_proc, xxf_lo%ulim_alloc
×
463
          !Get indices
464
          isign=isign_idx(xxf_lo,ixxf)
×
465
          ik = ik_idx(xxf_lo, ixxf)
×
466
          il = il_idx(xxf_lo, ixxf)
×
467
          ie = ie_idx(xxf_lo, ixxf)
×
468
          is = is_idx(xxf_lo, ixxf)
×
469
          ig = ig_idx(xxf_lo, ixxf)
×
470

471
          !Loop over receiving "it" indices
472
          do it=1,g_lo%ntheta0
×
473
             !Convert from g_lo%it to xxf_lo%it
474
             if(it>(xxf_lo%ntheta0+1)/2) then
×
475
                it0=it+xxf_lo%nx-xxf_lo%ntheta0
×
476
             else
477
                it0=it
×
478
             endif
479

480
             !Convert ixxf,it indices into iglo indices
481
             iglo=idx(g_lo, ik, it, il, ie, is)
×
482

483
             !Get proc id which has this data
484
             ip=proc_id(g_lo,iglo)
×
485

486
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
487

488
             !Determine message position index
489
             n=nn_to(ip)+1
×
490
             nn_to(ip)=n
×
491

492
             !Store receive indices
493
             to_list(ip)%first(n)=it0
×
494
             to_list(ip)%second(n)=ixxf
×
495

496
             !Store index for sorting
497
             sort_list(ip)%first(n) = ig+g_lo%ntgrid-1+g_lo%ntgridtotal*(isign-1+2*(iglo-g_lo%llim_world))
×
498
          enddo
499
       enddo
500
    endif
501

502
    !Now we need to sort the to_list message indices based on sort_list | This seems potentially slow + inefficient
503
    do ip=0,nproc-1
×
504
       !Only need to worry about procs which we are receiving from
505
       if(nn_to(ip)>0) then
×
506
          !Sort using quicksort
507
          call quicksort(nn_to(ip),sort_list(ip)%first,to_list(ip)%first,to_list(ip)%second)
×
508
       endif
509
    enddo
510

511
    !Setup array range values
512
    from_low (1) = -g_lo%ntgrid
×
513
    from_low (2) = 1
×
514
    from_low (3) = g_lo%llim_proc
×
515

516
    to_low = xxf_lo%llim_proc
×
517

518
    to_high(1) = xxf_lo%nx
×
519
    to_high(2) = xxf_lo%ulim_alloc
×
520

521
    from_high(1) = g_lo%ntgrid
×
522
    from_high(2) = 2
×
523
    from_high(3) = g_lo%ulim_alloc
×
524

525
    call set_redist_character_type(g2x, 'g2x')
×
526
    call set_xxf_optimised_variables(opt_local_copy, xxf_lo%naky,  &
527
         xxf_lo%ntgrid,  xxf_lo%ntheta0, xxf_lo%nlambda,  xxf_lo%negrid, &
528
         xxf_lo%nx, xxf_lo%ulim_proc, g_lo%ulim_proc, layout)
×
529

530
    !Create g2x redistribute object
531
    call init_redist (g2x, 'c', to_low, to_high, to_list, &
×
532
         from_low, from_high, from_list)
×
533

534
    !Deallocate list objects
535
    call delete_list (to_list)
×
536
    call delete_list (from_list)
×
537
    call delete_list(sort_list)
×
538

539
  end subroutine setup_x_redist_local
×
540

541
  !> Setup global xxf_lo and redistribute between global g_lo -> xxf_lo
542
  subroutine init_x_redist_local()
×
543
    use gs2_layouts, only: g_lo, xxf_lo
544
    implicit none
545

546
    !Early exit if possible
547
    if (initialized_x_redist) return
×
548
    initialized_x_redist = .true.
×
549

550
    call setup_x_redist_local(g_lo, xxf_lo, g2x)
×
551
  end subroutine init_x_redist_local
552

553
  !> Constructs the redistribute mapping from the xxf_lo data
554
  !> decomposition to the yxf_lo decomposition.
555
  subroutine setup_y_redist_local(xxf_lo, yxf_lo, x2y)
×
556
    use layouts_type, only: xxf_layout_type, yxf_layout_type
557
    use gs2_layouts, only: proc_id, idx_local
558
    use gs2_layouts, only: ik_idx,it_idx,il_idx,ie_idx,is_idx,idx,ig_idx,isign_idx
559
    use mp, only: nproc
560
    use redistribute, only: index_list_type, init_redist, delete_list
561
    use redistribute, only: set_yxf_optimised_variables, set_redist_character_type
562
    use sorting, only: quicksort
563
    implicit none
564
    type(xxf_layout_type), intent(in) :: xxf_lo
565
    type(yxf_layout_type), intent(in) :: yxf_lo
566
    type(redist_type), intent(in out) :: x2y
567
    type (index_list_type), dimension(0:nproc-1) :: to_list, from_list,sort_list
×
568
    integer, dimension(0:nproc-1) :: nn_to, nn_from
×
569
    integer, dimension (2) :: from_low, from_high, to_high
570
    integer :: to_low
571
    integer :: it, ixxf, ik, iyxf
572
    integer :: ixxf_start, iyxf_start
573
    integer :: n, ip, il, ig
574

575
    !Initialise counts to zero
576
    nn_to = 0
×
577
    nn_from = 0
×
578

579
    !First count data to send | xxf_lo-->yxf_lo
580
    !Protect against procs with no data
581
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
582
       do ixxf=xxf_lo%llim_proc,xxf_lo%ulim_alloc
×
583
          il = il_idx(xxf_lo, ixxf)
×
584
          ig = ig_idx(xxf_lo, ixxf)
×
585

586
          !Get iyxf index for "it"=1
587
          iyxf_start=idx(yxf_lo, ig,&
588
               isign_idx(xxf_lo,ixxf), 1, il,&
589
               ie_idx(xxf_lo,ixxf),is_idx(xxf_lo,ixxf))
×
590

591
          !Loop over "it" range, note that we actually only want to know
592
          !iyxf in this range and we know that it-->it+1 => iyxf-->iyxf+1
593
          !so replace loop with one over iyxf
594
          do iyxf=iyxf_start,iyxf_start+yxf_lo%nx-1
×
595
             ip = proc_id(yxf_lo, iyxf)
×
596

597
             ! Don't send data from forbidden region, unless it is going to
598
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
599
             ! based methods.
600
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
601
             !Increase the appropriate procs send count
602
             nn_from(ip)=nn_from(ip)+1
×
603
          enddo
604
       enddo
605
    endif
606

607
    !Now count data to receive | yxf_lo<--xxf_lo
608
    !Protect against procs with no data
609
    if(yxf_lo%ulim_proc>=yxf_lo%llim_proc)then
×
610
       do iyxf=yxf_lo%llim_proc,yxf_lo%ulim_alloc
×
611
          ig = ig_idx(yxf_lo, iyxf)
×
612
          il = il_idx(yxf_lo, iyxf)
×
613
          !Get ixxf index for "ik"=1
614
          ixxf_start=idx(xxf_lo, ig,&
615
               isign_idx(yxf_lo,iyxf), 1, il,&
616
               ie_idx(yxf_lo,iyxf),is_idx(yxf_lo,iyxf))
×
617

618
          !Loop over "ik" range, note that we actually only want to know
619
          !ixxf in this range and we know that ik-->ik+1 => ixxf-->ixxf+1
620
          !so replace loop with one over ixxf
621
          do ixxf=ixxf_start,ixxf_start+xxf_lo%naky-1
×
622
             ip = proc_id(xxf_lo, ixxf)
×
623

624
             ! Don't send data from forbidden region, unless it is going to
625
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
626
             ! based methods.
627
             if (point_is_forbidden_and_not_redistributed(ig, il, ip)) cycle
×
628

629
             !Increase the appropriate procs recv count
630
             nn_to(ip) = nn_to(ip)+1
×
631
          enddo
632
       enddo
633
    endif
634

635
    !Now allocate storage for data mapping structures
636
    do ip = 0, nproc-1
×
637
       if (nn_from(ip) > 0) then
×
638
          allocate (from_list(ip)%first(nn_from(ip)))
×
639
          allocate (from_list(ip)%second(nn_from(ip)))
×
640
       end if
641
       if (nn_to(ip) > 0) then
×
642
          allocate (to_list(ip)%first(nn_to(ip)))
×
643
          allocate (to_list(ip)%second(nn_to(ip)))
×
644
          !For sorting to_list later
645
          allocate (sort_list(ip)%first(nn_to(ip)))
×
646
       end if
647
    end do
648

649
    !Reinitialise count arrays to zero
650
    nn_to = 0
×
651
    nn_from = 0
×
652

653
    !First fill in the sending indices, these define the messages data order
654
    !Protect against procs with no data
655
    if(xxf_lo%ulim_proc>=xxf_lo%llim_proc)then
×
656
       do ixxf=xxf_lo%llim_proc,xxf_lo%ulim_alloc
×
657
          ig = ig_idx(xxf_lo, ixxf)
×
658
          il = il_idx(xxf_lo, ixxf)
×
659

660
          !Get iyxf for "it"=1
661
          iyxf=idx(yxf_lo, ig,&
662
               isign_idx(xxf_lo,ixxf), 1, il,&
663
               ie_idx(xxf_lo,ixxf),is_idx(xxf_lo,ixxf))
×
664

665
          !Now loop over other local dimension. Note we need "it" here
666
          !so don't replace this with loop over iyxf
667
          do it=1,yxf_lo%nx
×
668
             !Get the processor id
669
             ip=proc_id(yxf_lo,iyxf)
×
670

671
             ! Don't send data from forbidden region, unless it is going to
672
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
673
             ! based methods.
674
             if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
675
                !Increment the procs message counter
676
                n=nn_from(ip)+1
×
677
                nn_from(ip)=n
×
678

679
                !Store indices
680
                from_list(ip)%first(n)=it
×
681
                from_list(ip)%second(n)=ixxf
×
682
             end if
683

684
             !Increment iyxf
685
             iyxf=iyxf+1
×
686
          enddo
687
       enddo
688
    endif
689

690
    !Now fill in the receiving indices, these must match the message data order, achieved by later sorting
691
    !Protect against procs with no data
692
    if(yxf_lo%ulim_proc>=yxf_lo%llim_proc)then
×
693
       do iyxf=yxf_lo%llim_proc,yxf_lo%ulim_alloc
×
694
          ig = ig_idx(yxf_lo, iyxf)
×
695
          il = il_idx(yxf_lo, iyxf)
×
696

697
          !Get ixxf for "ik"=1
698
          ixxf=idx(xxf_lo, ig,&
699
               isign_idx(yxf_lo,iyxf), 1, il,&
700
               ie_idx(yxf_lo,iyxf),is_idx(yxf_lo,iyxf))
×
701

702
          !Now loop over other local dimension. Note we need "ik" here
703
          !so don't replace this with loop over ixxf
704
          do ik=1,xxf_lo%naky
×
705
             !Get the processor id
706
             ip=proc_id(xxf_lo,ixxf)
×
707

708
             ! Don't send data from forbidden region, unless it is going to
709
             ! this processor (i.e. local copy) due to assumptions in opt_local_copy
710
             ! based methods.
711
             if (.not. point_is_forbidden_and_not_redistributed(ig, il, ip)) then
×
712

713
                !Increment the procs message counter
714
                n=nn_to(ip)+1
×
715
                nn_to(ip)=n
×
716

717
                !Store indices
718
                to_list(ip)%first(n)=ik
×
719
                to_list(ip)%second(n)=iyxf
×
720

721
                !Store index for sorting
722
                sort_list(ip)%first(n)=it_idx(yxf_lo,iyxf)+ixxf*yxf_lo%nx
×
723
             end if
724

725
             !Increment ixxf
726
             ixxf=ixxf+1
×
727
          enddo
728
       enddo
729
    endif
730

731
    !Now we need to sort the to_list message indices based on sort_list
732
    !This could be slow and inefficient
733
    do ip=0,nproc-1
×
734
       !Only need to worry about procs which we are receiving from
735
       if(nn_to(ip)>0) then
×
736
          !Use quicksort based on compound index
737
          call quicksort(nn_to(ip),sort_list(ip)%first,to_list(ip)%first,to_list(ip)%second)
×
738
       endif
739
    enddo
740

741
    !Setup array bound values
742
    from_low(1) = 1
×
743
    from_low(2) = xxf_lo%llim_proc
×
744

745
    to_low = yxf_lo%llim_proc
×
746

747
    to_high(1) = yxf_lo%ny/2+1
×
748
    to_high(2) = yxf_lo%ulim_alloc
×
749

750
    from_high(1) = xxf_lo%nx
×
751
    from_high(2) = xxf_lo%ulim_alloc
×
752

753
    call set_redist_character_type(x2y, 'x2y')
×
754
    call set_yxf_optimised_variables(yxf_lo%ulim_proc)
×
755

756
    !Create x2y redist object
757
    call init_redist (x2y, 'c', to_low, to_high, to_list, &
×
758
         from_low, from_high, from_list)
×
759

760
    !Deallocate list objects
761
    call delete_list (to_list)
×
762
    call delete_list (from_list)
×
763
    call delete_list (sort_list)
×
764
  end subroutine setup_y_redist_local
×
765

766
  !> Small helper function to wrap up logic about if this point corresponds to a forbidden
767
  !> point and if we need to transfer it in the redistribute
768
  pure logical function point_is_forbidden_and_not_redistributed(ig, il, ip) result(flag)
×
769
    use mp, only: iproc
770
    use le_grids, only: forbid
771
    use gs2_layouts, only: opt_local_copy
772
    implicit none
773
    integer, intent(in) :: ig, il, ip
774
    !> The following is for debug/testing to force all grid
775
    !> points to be communicated if we wish.
776
    logical, parameter :: skip_forbidden_region = .true.
777
    flag = &
778
         !We want to skip the forbidden points
779
         skip_forbidden_region .and. &
×
780
         !Point is in forbidden region
781
         forbid(ig, il) &
782
         !Point will be sent to a different proc, or we are not using opt_local_copy.
783
         .and. ((ip/=iproc) .or. .not. opt_local_copy)
×
784
  end function point_is_forbidden_and_not_redistributed
×
785

786
  !> Setup the module level xxf -> yxf redistribute. Note this also
787
  !> calls [[init_x_redist_local]] in order to setup the mapping for
788
  !> g_lo to xxf_lo as well.
789
  subroutine init_y_redist_local
×
790
    use gs2_layouts, only: xxf_lo, yxf_lo
791
    implicit none
792

793
    !Setup g_lo-->xxf_lo redist object first
794
    call init_x_redist_local
×
795

796
    !Early exit if possible
797
    if (initialized_y_redist) return
×
798
    initialized_y_redist = .true.
×
799

800
    call setup_y_redist_local(xxf_lo, yxf_lo, x2y)
×
801
  end subroutine init_y_redist_local
802

803
  !> FIXME : Add documentation
804
  subroutine transform_x5d (g, xxf)
×
805
    use gs2_layouts, only: xxf_lo, g_lo
806
    use redistribute, only: gather
807
    use job_manage, only: time_message
808
    use fft_work, only: time_fft
809
    use array_utils, only: zero_array
810
    implicit none
811
    complex, dimension (-xxf_lo%ntgrid:,:,g_lo%llim_proc:), intent (in) :: g
812
    complex, dimension (:,xxf_lo%llim_proc:), intent (out) :: xxf
813

814
    ! Zero out the array as the subsequent gather doesn't populate
815
    ! every element as we skip communicating forbidden points and
816
    ! we are padding our data. We can't just zero once at the start
817
    ! as fftw can destroy the input, meaning it might set zero entries
818
    ! in fft to something non-zero. Plus here xxf also holds the output.
819
    call zero_array(xxf)
×
820

821
    !CMR, 7/3/2011: gather pulls appropriate pieces of g onto this processor for
822
    !    local Fourier transform in x, and may also pad with zeros for dealiasing
823
    !
824
    call gather (g2x, g, xxf)
×
825

826
    call time_message(.false., time_fft, ' FFT')
×
827
    call xf_fft%execute_c2c(xxf, xxf)
×
828
    call time_message(.false., time_fft, ' FFT')
×
829
  end subroutine transform_x5d
×
830

831
  !> FIXME : Add documentation  
832
  subroutine inverse_x5d (xxf, g)
×
833
    use gs2_layouts, only: xxf_lo, g_lo
834
    use redistribute, only: scatter
835
    use job_manage, only: time_message
836
    use fft_work, only: time_fft
837
    implicit none
838
    complex, dimension (:,xxf_lo%llim_proc:), intent (in out) :: xxf
839
    complex, dimension (-xxf_lo%ntgrid:,:,g_lo%llim_proc:), intent (out) :: g
840

841
    call time_message(.false., time_fft, ' FFT')
×
842
    call xb_fft%execute_c2c(xxf, xxf)
×
843
    call time_message(.false., time_fft, ' FFT')
×
844

845
    call scatter (g2x, xxf, g)
×
846
  end subroutine inverse_x5d
×
847

848
  !> FIXME : Add documentation
849
  subroutine transform_y5d (xxf, yxf)
×
850
    use gs2_layouts, only: xxf_lo, yxf_lo
851
    use redistribute, only: gather
852
    use job_manage, only: time_message
853
    use fft_work, only: time_fft
854
    use array_utils, only: zero_array
855
    implicit none
856
    complex, dimension (:,xxf_lo%llim_proc:), intent (in) :: xxf
857
    real, dimension (:,yxf_lo%llim_proc:), intent(in out) :: yxf
858

859
    ! Zero out the array as the subsequent gather doesn't populate
860
    ! every element as we skip communicating forbidden points and
861
    ! we are padding our data. We can't just zero once at the start
862
    ! as fftw can destroy the input, meaning it might set zero entries
863
    ! in fft to something non-zero.
864
    call zero_array(fft)
×
865

866
    !Note here we're doing the communication even if we're not using
867
    !an FFT routine.
868
    call gather (x2y, xxf, fft)
×
869

870
    call time_message(.false., time_fft, ' FFT')
×
871
    call yf_fft%execute_c2r(fft, yxf)
×
872
    call time_message(.false., time_fft, ' FFT')
×
873
  end subroutine transform_y5d
×
874

875
  !> FIXME : Add documentation  
876
  subroutine inverse_y5d (yxf, xxf)
×
877
    use gs2_layouts, only: xxf_lo, yxf_lo
878
    use redistribute, only: scatter
879
    use job_manage, only: time_message
880
    use fft_work, only: time_fft
881
    implicit none
882
    real, dimension (:,yxf_lo%llim_proc:), intent (in out) :: yxf
883
    complex, dimension (:,xxf_lo%llim_proc:), intent (out) :: xxf
884

885
    call time_message(.false., time_fft, ' FFT')
×
886
    call yb_fft%execute_r2c(yxf, fft)
×
887
    call time_message(.false., time_fft, ' FFT')
×
888

889
    call scatter (x2y, fft, xxf)
×
890
  end subroutine inverse_y5d
×
891

892
  !> FIXME : Add documentation  
893
  subroutine transform2_5d (g, yxf)
×
894
    use gs2_layouts, only: g_lo, yxf_lo, ik_idx
895
    use unit_tests,only: debug_message
896
    implicit none
897
    complex, dimension (:,:,g_lo%llim_proc:), intent (in out) :: g
898
    real, dimension (:,yxf_lo%llim_proc:), intent (out) :: yxf
899
    integer :: iglo
900

901
    call debug_message(4, 'gs2_transforms::transform2_5d starting')
×
902

903
    !CMR+GC: 2/9/2013
904
    !  gs2's Fourier coefficients,  F_k^gs2, not standard form: i.e. f(x) = f_k e^(i k.x)
905
    !
906
    !  F_k^gs2 are 2 x larger for ky > 0,   i.e.
907
    !                     F_k^gs2 = |    f_k   for ky = 0
908
    !                               |  2 f_k   for ky > 0
909
    !
910
    ! Following large loop (due to this) can be eliminated with std Fourier coeffs.
911
    ! Similar optimisations possible in: 
912
    !          "inverse2_5d", "transform2_5d_accel", "inverse2_5d_accel" 
913
    !
914
    ! NB Moving to standard Fourier coeffs would impact considerably on diagnostics:
915
    !       e.g. fac in get_volume_average
916
    !
917

918
    !$OMP PARALLEL DO DEFAULT(none) &
919
    !$OMP SHARED(g_lo, g) &
920
    !$OMP SCHEDULE(dynamic)
921
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
×
922
       if (ik_idx(g_lo, iglo) == 1) cycle
×
923
       g(:,:,iglo) = g(:,:,iglo) * 0.5
×
924
    end do
925
    !$OMP END PARALLEL DO
926

927
    call transform_x (g, xxf)
×
928
    call transform_y (xxf, yxf)
×
929
  end subroutine transform2_5d
×
930

931
  !> FIXME : Add documentation  
932
  subroutine inverse2_5d (yxf, g)
×
933
    use gs2_layouts, only: g_lo, yxf_lo, ik_idx
934
    implicit none
935
    real, dimension (:,yxf_lo%llim_proc:), intent (in out) :: yxf
936
    complex, dimension (:,:,g_lo%llim_proc:), intent (out) :: g
937
    integer :: iglo
938
    real :: scale
939
    call inverse_y (yxf, xxf)
×
940
    call inverse_x (xxf, g)
×
941

942
    scale = xb_fft%scale * yb_fft%scale
×
943
    !CMR+GC: 2/9/2013
944
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
945
    ! (See above comment in transform2_5d.)
946
    !
947

948
    !$OMP PARALLEL DO DEFAULT(none) &
949
    !$OMP SHARED(g_lo, g, scale) &
950
    !$OMP SCHEDULE(dynamic)
951
    do iglo = g_lo%llim_proc, g_lo%ulim_proc
×
952
       if (ik_idx(g_lo, iglo) /= 1) then
×
953
          g(:,:,iglo) = g(:,:,iglo) * (scale * 2.0)
×
954
       else
955
          g(:,:,iglo) = g(:,:,iglo) * scale
×
956
       end if
957
    end do
958
    !$OMP END PARALLEL DO
959
  end subroutine inverse2_5d
×
960

961
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
962

963
  !> FIXME : Add documentation
964
  subroutine transform2_5d_accel (g, axf)
×
965
    use gs2_layouts, only: g_lo, accel_lo, accelx_lo, ik_idx
966
    use unit_tests, only: debug_message
967
    use job_manage, only: time_message
968
    use fft_work, only: time_fft
969
#ifdef SHMEM
970
    use shm_mpi3, only : shm_info, shm_get_node_pointer, shm_node_barrier, shm_fence
971
#endif
972
    implicit none
973
    complex, dimension (:,:,g_lo%llim_proc:), target, intent (in out) :: g
974
    real, dimension (:,:,accelx_lo%llim_proc:), target, intent (in out) :: axf
975
    integer :: iglo, k, idx
976
#ifdef SHMEM
977
    complex, pointer, contiguous ::ag_ptr(:,:,:) => null(), g_ptr(:,:,:) => null()
978
    real, pointer, contiguous :: axf_ptr(:,:,:) => null() 
979
    integer  :: kk, nk, kb, bidx
980
    integer, save :: iglo_s, iglo_e, kbs, kbe
981
    logical, save :: firsttime =.true.
982
#endif 
983

984
    integer :: itgrid, iduo
985
    integer :: ntgrid
986

987
    call debug_message(4, 'gs2_transforms::transform2_5d_accel starting')
×
988

989
    ntgrid = accel_lo%ntgrid
×
990
#ifdef SHMEM
991
    g_ptr (1:,1:,g_lo%llim_node:) => shm_get_node_pointer(g, -1)
992
    ag_ptr (-ntgrid:,1:,accel_lo%llim_node:) => shm_get_node_pointer(ag, -1)
993
    if(firsttime)then
994
       firsttime=.false.
995
       call compute_block_partition(g_lo%ntheta0 * g_lo%naky, iglo_s, iglo_e)
996
       call compute_block_partition(accel_lo%nxnky, kbs, kbe)
997
    end if
998

999
#endif
1000
    !
1001
    !CMR+GC, 2/9/2013:
1002
    !  Scaling g would not be necessary if gs2 used standard Fourier coefficients.
1003
    !
1004
    ! scale the g and copy into the anti-aliased array ag
1005
    ! zero out empty ag
1006
    ! touch each g and ag only once
1007
#ifndef SHMEM
1008
    idx = g_lo%llim_proc
×
1009
    do k = accel_lo%llim_proc, accel_lo%ulim_proc
×
1010
       ! CMR: aidx is true for modes killed by the dealiasing mask
1011
       ! so following line removes ks in dealiasing region
1012
       if (aidx(k)) then
×
1013
          ag(:,:,k) = 0.0
×
1014
       else
1015
          ! scaling only for k_y not the zero mode
1016
          if (ik_idx(g_lo, idx) /= 1) then
×
1017
             do iduo = 1, 2
×
1018
                do itgrid = 1, 2*ntgrid +1
×
1019
                   ! It seems strange that we scale the input and then
1020
                   ! copy it to ag -- why not just scaling ag, which is
1021
                   ! what we are going to fft and return?
1022
                   g(itgrid, iduo, idx) &
×
1023
                        = 0.5 * g(itgrid, iduo, idx)
×
1024
                   ag(itgrid - (ntgrid+1), iduo, k) &
×
1025
                        = g(itgrid, iduo, idx)
×
1026
                enddo
1027
             enddo
1028
             ! in case of k_y being zero: just copy
1029
          else
1030
             do iduo = 1, 2
×
1031
                do itgrid = 1, 2*ntgrid+1
×
1032
                   ag(itgrid-(ntgrid+1), iduo, k) &
×
1033
                        = g(itgrid, iduo, idx)
×
1034
                enddo
1035
             enddo
1036
          endif
1037
          idx = idx + 1
×
1038
       endif
1039
    enddo
1040
#else
1041
    call shm_node_barrier
1042
    bidx=0
1043
    do kb = accel_lo%llim_node, accel_lo%ulim_node, accel_lo%nxnky
1044
       idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0
1045

1046
       ! we might not have scaled all g
1047
       Do iglo = idx+iglo_s, idx+iglo_s+iglo_e
1048
          if (ik_idx(g_lo, iglo) /= 1) then
1049
             g_ptr(:,:, iglo) = 0.5 * g_ptr(:,:,iglo)
1050
          endif
1051
       enddo
1052

1053
       call shm_node_barrier
1054
       call shm_fence(g_ptr(1,1,g_lo%llim_node))
1055

1056
       do k = kb+kbs, kb+kbs+kbe !accel_lo%llim_node, accel_lo%ulim_node       
1057
          ! CMR: aidx is true for modes killed by the dealiasing mask
1058
          ! so following line removes ks in dealiasing region
1059
          if (aidx(k)) then
1060
             ag_ptr(:,:,k) = 0.0
1061
          else
1062
             do iduo = 1, 2
1063
                do itgrid =1, 2*ntgrid +1
1064
                   ag_ptr(itgrid - (ntgrid+1), iduo, k) &
1065
                        = g_ptr(itgrid, iduo, gidx(k))
1066
                enddo
1067
             enddo
1068
             !         idx=idx+1
1069
          endif
1070
       enddo
1071
       bidx=bidx+1
1072
    enddo
1073
#endif
1074

1075
    !CMR+GC: 2/9/2013
1076
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1077
    ! (See above comment in transform2_5d.)
1078

1079
#ifndef SHMEM
1080
    ! we might not have scaled all g
1081
    Do iglo = idx, g_lo%ulim_proc
×
1082
       if (ik_idx(g_lo, iglo) /= 1) then
×
1083
          g(:,:, iglo) = 0.5 * g(:,:,iglo)
×
1084
       endif
1085
    enddo
1086
#else
1087

1088
    axf_ptr(-ntgrid:,1:,accelx_lo%llim_node:) => & 
1089
         shm_get_node_pointer(axf, -1)
1090
#endif
1091

1092

1093
    ! transform
1094
#ifndef SHMEM
1095
    idx = 1
×
1096
    call time_message(.false., time_fft, ' FFT')
×
1097
    do k = accel_lo%llim_proc, accel_lo%ulim_proc, accel_lo%nxnky
×
1098
       ! remember FFTW3 for c2r destroys the contents of ag
1099
       call yf_fft%execute_c2r(ag(:, :, k:), axf(:, :, ia(idx):))
×
1100
       idx = idx + 1
×
1101
    end do
1102
    call time_message(.false., time_fft, ' FFT')
×
1103
#else
1104
    ! in the following the ranks operate on other data ranges
1105
    call shm_node_barrier
1106

1107
    if (use_shm_2d_plan) then
1108
       idx = 1 + shm_info%id
1109
       idx = mod(idx -1, accel_lo%nia) +1
1110
       nk = accel_lo%ulim_node - accel_lo%llim_node +1
1111
    else
1112
       idx = 1
1113
    endif
1114
    call time_message(.false., time_fft, ' FFT')
1115
    do k = accel_lo%llim_node, accel_lo%ulim_node, accel_lo%nxnky
1116
       if (use_shm_2d_plan) then
1117
          kk = k + shm_info%id * accel_lo%nxnky
1118
          kk = mod(kk - accel_lo%llim_node, nk) + accel_lo%llim_node
1119
       else
1120
          kk = k
1121
       endif
1122

1123
       call fft_shm(kk,ia(idx))
1124

1125
       idx = idx + 1
1126
       if ( use_shm_2d_plan) then
1127
          idx = mod(idx -1, accel_lo%nia) +1
1128
       endif
1129
    end do
1130
    call shm_node_barrier
1131
    call time_message(.false., time_fft, ' FFT')
1132
  contains
1133

1134
    !> FIXME : Add documentation
1135
    subroutine fft_shm(k1, k2)
1136
      use mp, only : proc0
1137
      implicit none
1138
      integer, intent(in) :: k1, k2
1139

1140
      if (use_shm_2d_plan) then 
1141
         call fft2d_shm(k1, k2)
1142
      else
1143
         call fft1d_shm(k1,k2)
1144
      endif
1145

1146
    end subroutine fft_shm
1147

1148
    !> FIXME : Add documentation
1149
    subroutine fft1d_shm(k1,k2)
1150
      use shm_mpi3, only : shm_info, shm_fence
1151
      implicit none
1152
      integer, intent(in) :: k1, k2
1153

1154
      integer :: i, j, is, ie, js, je, idw, nwk
1155
      integer :: n1,n2,n3,p1,p2,p3
1156

1157
      n1 = 2*(2*ntgrid+1)
1158
      n2 = accel_lo%ndky
1159
      n3 = accel_lo%nx
1160
      p1 = 2*(2*ntgrid+1)
1161
      p2 = accelx_lo%ny
1162
      p3 =  accelx_lo%nx
1163

1164
      if ( p3 /= n3 ) then 
1165
         write(0,*) "fft_shm: WARNING third dimesions not equal"
1166
      endif
1167

1168
      nwk = g_lo%ppn
1169
      idw = shm_info%id
1170
      is = idw*(n2/nwk) + min(idw,mod(n2, nwk))
1171
      ie = is + (n2/nwk) -1 
1172
      if ( idw < mod(n2, nwk)) ie = ie +1
1173
      js = idw*(p3/nwk) + min(idw,mod(p3, nwk))
1174
      je = js + (p3/nwk) -1 
1175
      if ( idw < mod(p3, nwk)) je = je +1
1176

1177
      do i = is, ie
1178
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1179
         ! so we have to fall back to an implied interface approach
1180
#ifdef SINGLE_PRECISION
1181
         call sfftw_execute_dft( &
1182
#else
1183
         call dfftw_execute_dft( &
1184
#endif
1185
         faccel_shmy%plan, ag_ptr(-ntgrid,1,k1+i), ag_ptr(-ntgrid,1,k1+i))
1186
      end do
1187

1188
      call shm_node_barrier
1189
      call shm_fence(ag_ptr(-ntgrid,1,accel_lo%llim_node))
1190

1191
      do j = js, je
1192
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1193
         ! so we have to fall back to an implied interface approach
1194
#ifdef SINGLE_PRECISION
1195
         call sfftw_execute_dft_c2r( &
1196
#else
1197
         call dfftw_execute_dft_c2r( &
1198
#endif
1199
         faccel_shmx%plan, ag_ptr(-ntgrid,1,k1+j*n2), axf_ptr(-ntgrid,1,k2+j*p2))
1200
      end do
1201
    end subroutine fft1d_shm
1202

1203
    !> FIXME : Add documentation         
1204
    subroutine fft2d_shm(k1,k2)
1205
      use shm_mpi3, only : shm_info, shm_fence
1206
      implicit none
1207
      integer, intent(in) :: k1, k2
1208

1209
      integer :: i, j, n2,n3, howmany,ts1,ts2
1210

1211
      complex,allocatable :: cin(:,:)
1212
      real, allocatable ::  rout(:,:) 
1213

1214
      n2 = accel_lo%ny
1215
      n3 = accel_lo%nx
1216
      howmany = yf_fft_shm%howmany
1217
      ts1=yf_fft_shm%ts1
1218
      ts2=yf_fft_shm%ts2
1219

1220
      if (use_shm_2d_plan_buff) then
1221
         allocate(rout(howmany, n2*n3), &
1222
              cin(howmany, (n2/2+1)*n3))
1223

1224
         do j=1,(n2/2+1)*n3
1225
            do i=1,howmany
1226
               cin(i,j) =  ag_ptr(ts1+i-1, ts2,k1+j-1)
1227
            enddo
1228
         enddo
1229

1230
         call yf_fft_shm%execute_c2r(cin, rout)
1231

1232
         do j=1,n2*n3
1233
            do i=1,howmany
1234
               axf_ptr(ts1+i-1,ts2,k2+j-1)= rout(i,j)
1235
            end do
1236
         end do
1237
      else
1238
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1239
         ! so we have to fall back to an implied interface approach
1240
#ifdef SINGLE_PRECISION
1241
         call sfftw_execute_dft_c2r( &
1242
#else
1243
         call dfftw_execute_dft_c2r( &
1244
#endif
1245
         yf_fft_shm%plan, ag_ptr(ts1, ts2,k1), axf_ptr(ts1,ts2,k2))
1246
      end if
1247

1248
    end subroutine fft2d_shm
1249
# endif
1250

1251
  end subroutine transform2_5d_accel
×
1252

1253
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1254

1255
  !> FIXME : Add documentation
1256
  subroutine inverse2_5d_accel (axf, g)
×
1257
    use gs2_layouts, only: g_lo, accel_lo, accelx_lo, ik_idx
1258
    use job_manage, only: time_message
1259
    use fft_work, only: time_fft
1260
#ifdef SHMEM
1261
    use shm_mpi3, only : shm_info, shm_get_node_pointer, shm_node_barrier, shm_fence
1262
#endif
1263
    implicit none
1264
    real, dimension (:,:,accelx_lo%llim_proc:), target, intent (in out) :: axf
1265
    complex, dimension (:,:,g_lo%llim_proc:), target, intent (out) :: g
1266
    integer :: iglo, idx, k
1267
    integer :: itgrid, iduo
1268
    integer :: ntgrid
1269
#ifdef SHMEM
1270
    complex, pointer, contiguous ::ag_ptr(:,:,:) => null(), g_ptr(:,:,:) => null()
1271
    real, pointer, contiguous :: axf_ptr(:,:,:) => null() 
1272
    integer kk, nk, kb, bidx
1273
    integer, save :: iglo_s, iglo_e, kbs, kbe
1274
    logical, save :: firsttime =.true.
1275
#endif
1276

1277
    ntgrid = accel_lo%ntgrid
×
1278
#ifndef SHMEM
1279
    ! transform
1280
    idx = 1
×
1281
    call time_message(.false., time_fft, ' FFT')
×
1282
    do k = accelx_lo%llim_proc, accelx_lo%ulim_proc, accelx_lo%nxny
×
1283
       call yb_fft%execute_r2c(axf(:, :, k:), ag(:, :, iak(idx):))
×
1284
       idx = idx + 1
×
1285
    end do
1286
    call time_message(.false., time_fft, ' FFT')
×
1287

1288
    idx = g_lo%llim_proc
×
1289
    do k = accel_lo%llim_proc, accel_lo%ulim_proc
×
1290
       ! ignore the large k (anti-alias)
1291
       if ( .not.aidx(k)) then
×
1292
          !
1293
          !CMR+GC, 2/9/2013:
1294
          !  Scaling g here would be unnecessary if gs2 used standard Fourier coefficients.
1295
          ! different scale factors depending on ky == 0
1296
          if (ik_idx(g_lo, idx) /= 1) then
×
1297
             do iduo = 1, 2 
×
1298
                do itgrid = 1, 2*ntgrid+1
×
1299
                   g(itgrid, iduo, idx) &
×
1300
                        = 2.0 * yb_fft%scale * ag(itgrid-(ntgrid+1), iduo, k)
×
1301
                enddo
1302
             enddo
1303
          else
1304
             do iduo = 1, 2 
×
1305
                do itgrid = 1, 2*ntgrid+1
×
1306
                   g(itgrid, iduo, idx) &
×
1307
                        = yb_fft%scale * ag(itgrid-(ntgrid+1), iduo, k)
×
1308
                enddo
1309
             enddo
1310
          endif
1311
          idx = idx + 1
×
1312
       endif
1313
    enddo
1314

1315
    !CMR+GC: 2/9/2013
1316
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1317
    ! (See above comment in transform2_5d.)
1318

1319
    ! we might not have scaled all g
1320
    Do iglo = idx, g_lo%ulim_proc
×
1321
       if (ik_idx(g_lo, iglo) /= 1) then
×
1322
          g(:,:, iglo) = 2.0 * g(:,:,iglo)
×
1323
       endif
1324
    enddo
1325

1326
#else
1327
    ! shm
1328
    ag_ptr (-ntgrid:,1:,accel_lo%llim_node:) => shm_get_node_pointer(ag, -1)
1329
    axf_ptr(1:,1:,accelx_lo%llim_node:) => shm_get_node_pointer(axf, -1)
1330

1331
    call shm_node_barrier
1332
    ! transform
1333
    if ( use_shm_2d_plan) then
1334
       idx = 1 + shm_info%id
1335
       idx = mod(idx-1,accel_lo%nia) +1
1336
       nk = accelx_lo%ulim_node - accelx_lo%llim_node +1
1337
    else
1338
       idx = 1
1339
    endif
1340
    call time_message(.false., time_fft, ' FFT')
1341
    do k = accelx_lo%llim_node, accelx_lo%ulim_node, accelx_lo%nxny
1342
       if (use_shm_2d_plan) then
1343
          kk = k + shm_info%id * accelx_lo%nxny
1344
          kk = mod(kk - accelx_lo%llim_node, nk) + accelx_lo%llim_node
1345
       else
1346
          kk = k
1347
       endif
1348

1349
       call fft_shm_inv(kk, iak(idx))
1350

1351
       idx = idx + 1
1352
       if ( use_shm_2d_plan) then
1353
          idx = mod(idx-1, accel_lo%nia) +1
1354
       endif
1355
    end do
1356

1357
    ! see comment from _accel
1358
    call shm_node_barrier
1359
    call time_message(.false., time_fft, ' FFT')
1360

1361
    if(firsttime)then
1362
       firsttime=.false.
1363
       call compute_block_partition(g_lo%ntheta0 * g_lo%naky, iglo_s, iglo_e)
1364
       call compute_block_partition(accel_lo%nxnky, kbs, kbe)
1365
    end if
1366

1367
    g_ptr (1:,1:,g_lo%llim_node:) => shm_get_node_pointer(g, -1)
1368
    bidx=0
1369
    do kb = accel_lo%llim_node, accel_lo%ulim_node,accel_lo%nxnky
1370
       !      idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0 + iglo_s !g_lo%llim_node !gidx_s !g_lo%llim_proc
1371
       do k = kb+kbs, kb+kbs+kbe 
1372
          ! ignore the large k (anti-alias)
1373
          if ( .not.aidx(k)) then
1374
             !
1375
             !CMR+GC, 2/9/2013:
1376
             !  Scaling g here would be unnecessary if gs2 used standard Fourier coefficients.
1377
             ! different scale factors depending on ky == 0
1378
             do iduo = 1, 2 
1379
                do itgrid = 1, 2*ntgrid+1
1380
                   g_ptr(itgrid, iduo, gidx(k)) &
1381
                        = yb_fft%scale * ag_ptr(itgrid-(ntgrid+1), iduo, k)
1382
                enddo
1383
             enddo
1384
             !          idx = idx + 1
1385
          endif
1386
       enddo
1387
       call shm_node_barrier
1388
       call shm_fence(g_ptr(1,1,g_lo%llim_node))
1389
       ! we might not have scaled all g
1390
       idx = g_lo%llim_node +bidx * g_lo%naky*g_lo%ntheta0 
1391
       Do iglo = idx+iglo_s, idx+iglo_s+iglo_e
1392
          if (ik_idx(g_lo, iglo) /= 1) then
1393
             g_ptr(:,:, iglo) = 2.0 * g_ptr(:,:,iglo)
1394
          endif
1395
       enddo
1396
       bidx=bidx+1
1397
    enddo
1398

1399
    !CMR+GC: 2/9/2013
1400
    ! Following large loop can be eliminated if gs2 used standard Fourier coefficients.
1401
    ! (See above comment in transform2_5d.)
1402

1403
    call shm_node_barrier
1404

1405
  contains
1406

1407
    !> FIXME : Add documentation
1408
    subroutine fft_shm_inv(k1, k2)
1409
      implicit none
1410
      integer, intent(in) :: k1, k2
1411
      if (use_shm_2d_plan) then
1412
         call fft2d_shm_inv(k1, k2)
1413
      else
1414
         call fft1d_shm_inv(k1,k2)
1415
      endif
1416
    end subroutine fft_shm_inv
1417

1418
    !> FIXME : Add documentation
1419
    subroutine fft1d_shm_inv(k1,k2)
1420
      use shm_mpi3, only : shm_info, shm_fence
1421
      implicit none
1422
      integer, intent(in) :: k1, k2
1423

1424
      integer i, j, is, ie, js, je, idw, nwk
1425
      integer nx, ny, px, py
1426

1427
      ! test ny == py !!!
1428
      nx = accelx_lo%ny
1429
      ny = accelx_lo%nx
1430
      px = accel_lo%ndky 
1431
      py = accel_lo%nx
1432

1433
      nwk = g_lo%ppn
1434
      idw = shm_info%id
1435
      is = idw*(px/nwk) + min(idw,mod(px, nwk))
1436
      ie = is + (px/nwk) -1 
1437
      if ( idw < mod(px, nwk)) ie = ie +1
1438
      js = idw*(ny/nwk) + min(idw,mod(ny, nwk))
1439
      je = js + (ny/nwk) -1 
1440
      if ( idw < mod(ny, nwk)) je = je +1
1441

1442
      do j = js, je
1443
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1444
         ! so we have to fall back to an implied interface approach
1445
#ifdef SINGLE_PRECISION
1446
         call sfftw_execute_dft_r2c( &
1447
#else
1448
         call dfftw_execute_dft_r2c( &
1449
#endif
1450
         baccel_shmx%plan, axf_ptr(1,1,k1+j*nx), ag_ptr(-ntgrid,1,k2+j*px))
1451
      end do
1452

1453
      call shm_node_barrier
1454
      call shm_fence(ag_ptr(-ntgrid,1,accel_lo%llim_node))
1455

1456
      do i = is, ie
1457
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1458
         ! so we have to fall back to an implied interface approach
1459
#ifdef SINGLE_PRECISION
1460
         call sfftw_execute_dft( &
1461
#else
1462
         call dfftw_execute_dft( &
1463
#endif
1464
         baccel_shmy%plan, ag_ptr(-ntgrid,1,k2+i), ag_ptr(-ntgrid,1,k2+i))
1465
      end do
1466
    end subroutine fft1d_shm_inv
1467

1468
    !> FIXME : Add documentation          
1469
    subroutine fft2d_shm_inv(k1,k2)
1470
      use, intrinsic :: iso_c_binding 
1471
      use shm_mpi3, only : shm_info, shm_fence
1472
      implicit none
1473
      integer, intent(in) :: k1, k2
1474

1475
      integer i,j, ts1, ts1o, ts2, n2,n3, howmany
1476
      real, allocatable :: rin(:,:)
1477
      complex,allocatable::cout(:,:)
1478

1479
      n2 = accel_lo%ny
1480
      n3 = accel_lo%nx
1481
      ts1 = yb_fft_shm%ts1
1482
      ts2=yb_fft_shm%ts2
1483
      howmany=yb_fft_shm%howmany
1484
      ts1o=ntgrid +1+ts1
1485

1486
      if ( use_shm_2d_plan_buff) then 
1487
         allocate( rin(howmany, n2*n3), &
1488
              cout(howmany,(n2/2+1)*n3))
1489

1490
         do j=1,n2*n3
1491
            do i=1,howmany
1492
               rin(i,j) =  axf_ptr(ts1o+i-1,ts2,k1+j-1)
1493
            enddo
1494
         enddo
1495
         call yb_fft_shm%execute_r2c(rin, cout)
1496
         do j=1,(n2/2+1)*n3
1497
            do i=1,howmany
1498
               ag_ptr(ts1+i-1,ts2,k2+j-1) = cout(i,j)
1499
            enddo
1500
         enddo
1501
      else
1502
         ! For now these SHMEM calls don't match the modern fortran FFTW interface
1503
         ! so we have to fall back to an implied interface approach
1504
#ifdef SINGLE_PRECISION
1505
         call sfftw_execute_dft_r2c( &
1506
#else
1507
         call dfftw_execute_dft_r2c( &
1508
#endif
1509
         yb_fft_shm%plan, axf_ptr(ts1o,ts2,k1), ag_ptr(ts1,ts2,k2))
1510
      end if
1511
    end subroutine fft2d_shm_inv
1512
# endif
1513
  end subroutine inverse2_5d_accel
×
1514

1515
  !> FIXME : Add documentation        
1516
  subroutine init_3d (nny_in, nnx_in, how_many_in)
×
1517
    use fft_work, only: init_crfftw, init_rcfftw, FFT_TO_SPECTRAL_SPACE, FFT_TO_REAL_SPACE
1518
    implicit none
1519
    integer, intent(in) :: nny_in, nnx_in, how_many_in
1520
    integer, save :: nnx = 0, nny = 0, how_many = 0
1521

1522
    ! If we've already initialised check if any of the sizes have changed,
1523
    ! if not we can just return, otherwise delete plans and start again
1524
    if (initialized_3d) then
×
1525
       if (nnx /= nnx_in .or. nny /= nny_in .or. how_many /= how_many_in) then
×
1526
          call xf3d_cr%delete
×
1527
          call xf3d_rc%delete
×
1528
       else
1529
          return
×
1530
       end if
1531
    end if
1532
    initialized_3d = .true.
×
1533
    nny = nny_in
×
1534
    nnx = nnx_in
×
1535
    how_many = how_many_in
×
1536

1537
    call init_crfftw (xf3d_cr, FFT_TO_REAL_SPACE, nny, nnx, how_many)
×
1538
    call init_rcfftw (xf3d_rc, FFT_TO_SPECTRAL_SPACE, nny, nnx, how_many)
×
1539
  end subroutine init_3d
1540

1541
  !> FIXME : Add documentation        
1542
  subroutine transform2_3d (phi, phixf, nny, nnx)
×
1543
    use theta_grid, only: ntgrid
1544
    use kt_grids, only: naky, ntheta0, aky
1545
    use job_manage, only: time_message
1546
    use fft_work, only: time_fft
1547
    implicit none
1548
    integer, intent(in) :: nnx, nny
1549
    complex, dimension (-ntgrid:,:,:), intent (in) :: phi
1550
    real, dimension (:,:,-ntgrid:), intent (out) :: phixf  
1551
    real, dimension (:,:,:), allocatable :: phix
×
1552
    complex, dimension (:,:,:), allocatable :: aphi
×
1553
    real :: fac
1554
    integer :: ig, ik, it
1555

1556
    ! scale, dealias and transpose
1557
    call init_3d (nny, nnx, 2*ntgrid+1)
×
1558

1559
    allocate (phix (-ntgrid:ntgrid, nny, nnx))
×
1560
    allocate (aphi (-ntgrid:ntgrid, nny/2+1, nnx))
×
1561
    aphi = 0.
×
1562

1563
    do ik = 1, naky
×
1564
       fac = 0.5
×
1565
       if (aky(ik) < epsilon(0.)) fac = 1.0
×
1566
       do it = 1, (ntheta0+1) / 2
×
1567
          do ig = -ntgrid, ntgrid
×
1568
             aphi(ig, ik, it) = phi(ig, it, ik) * fac !Note transpose of it and ik here
×
1569
          end do
1570
       end do
1571
       do it = (ntheta0+1) / 2 + 1, ntheta0
×
1572
          do ig = -ntgrid, ntgrid
×
1573
             !CMR, 30/3/2010: bug fix to replace nx by nnx on next line
1574
             aphi(ig, ik, it-ntheta0+nnx) = phi(ig,it,ik) * fac
×
1575
          end do
1576
       end do
1577
    end do
1578

1579
    ! transform
1580
    call time_message(.false., time_fft, ' FFT')
×
1581
    call xf3d_cr%execute_c2r(aphi, phix)
×
1582
    call time_message(.false., time_fft, ' FFT')
×
1583

1584
    do it = 1, nnx
×
1585
       do ik = 1, nny
×
1586
          do ig = -ntgrid, ntgrid
×
1587
             phixf(it, ik, ig) = phix(ig, ik, it)
×
1588
          end do
1589
       end do
1590
    end do
1591

1592
    deallocate (aphi, phix)
×
1593

1594
  end subroutine transform2_3d
×
1595

1596
  !> FIXME : Add documentation
1597
  subroutine inverse2_3d (phixf, phi, nny, nnx)
×
1598
    !CMR, 30/4/2010:
1599
    ! Fixed up previously buggy handling of dealiasing.
1600
    use theta_grid, only: ntgrid
1601
    use kt_grids, only: naky, ntheta0, aky
1602
    use job_manage, only: time_message
1603
    use fft_work, only: time_fft
1604
    implicit none
1605
    real, dimension (:,:,-ntgrid:), intent(in) :: phixf
1606
    complex, dimension (-ntgrid:,:,:), intent(out) :: phi
1607
    integer, intent(in) :: nnx, nny
1608
    complex, dimension (:,:,:), allocatable :: aphi
×
1609
    real, dimension (:,:,:), allocatable :: phix
×
1610
    real :: fac
1611
    integer :: ik, it, ig
1612
    ! Note assumes init_3d has already been called
1613
    allocate (aphi (-ntgrid:ntgrid, nny/2+1, nnx))
×
1614
    allocate (phix (-ntgrid:ntgrid, nny, nnx))
×
1615

1616
    do it = 1, nnx
×
1617
       do ik = 1, nny
×
1618
          do ig = -ntgrid, ntgrid
×
1619
             phix(ig, ik, it) = phixf(it, ik, ig)
×
1620
          end do
1621
       end do
1622
    end do
1623

1624
    ! transform
1625
    call time_message(.false., time_fft, ' FFT')
×
1626
    call xf3d_rc%execute_r2c(phix, aphi)
×
1627
    call time_message(.false., time_fft, ' FFT')
×
1628

1629
    ! dealias and scale
1630
    do it = 1, (ntheta0+1) / 2
×
1631
       do ik = 1, naky
×
1632
          fac = 2.0
×
1633
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1634
          do ig = -ntgrid, ntgrid
×
1635
             phi(ig, it, ik) = aphi(ig, ik, it) * fac * xf3d_rc%scale
×
1636
          end do
1637
       end do
1638
    end do
1639

1640
    !CMR, 30/4/2010:  fixed up previously buggy handling of dealiasing
1641
    do it = (ntheta0+1) / 2 + 1, ntheta0
×
1642
       do ik = 1, naky
×
1643
          fac = 2.0
×
1644
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1645
          do ig = -ntgrid, ntgrid
×
1646
             phi(ig,it,ik) = aphi(ig, ik, it-ntheta0+nnx) * fac * xf3d_rc%scale
×
1647
          end do
1648
       end do
1649
    end do
1650

1651
    deallocate (aphi, phix)
×
1652

1653
  end subroutine inverse2_3d
×
1654

1655
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1656

1657
  !> FIXME : Add documentation
1658
  subroutine transform2_2d (phi, phixf, nny, nnx)
×
1659
    use fft_work, only: FFT_TO_REAL_SPACE, init_crfftw, time_fft
1660
    use kt_grids, only: naky, nakx => ntheta0, nx, aky
1661
    use job_manage, only: time_message
1662
    implicit none
1663
    integer, intent(in) :: nnx, nny
1664
    complex, dimension(:, :), intent (in) :: phi
1665
    real, dimension(:, :), intent (out) :: phixf
1666
    real, dimension(:, :), allocatable :: phix
×
1667
    complex, dimension(:, :), allocatable :: aphi
×
1668
    real :: fac
1669
    integer :: ik, it
1670
    type (fft_type) :: xf2d
1671

1672
    !May be inefficient to create and destroy this fft plan
1673
    !on every call to the routine. We may want to move this
1674
    !variable to module level and check its created flag.
1675
    call init_crfftw (xf2d, FFT_TO_REAL_SPACE, nny, nnx, 1)
×
1676

1677
    allocate (phix (nny, nnx))
×
1678
    allocate (aphi (nny/2+1, nnx))
×
1679
    phix(:,:)=0.; aphi(:,:)=cmplx(0.,0.)
×
1680

1681
    ! scale, dealias and transpose
1682
    do ik = 1, naky
×
1683
       fac = 0.5
×
1684
       if (aky(ik) < epsilon(0.)) fac = 1.0
×
1685
       do it = 1, (nakx+1) / 2
×
1686
          aphi(ik, it) = phi(it, ik) * fac
×
1687
       end do
1688
       do it = (nakx+1) / 2 + 1, nakx
×
1689
          aphi(ik, it-nakx+nx) = phi(it, ik) * fac
×
1690
       end do
1691
    end do
1692

1693
    ! transform
1694
    call time_message(.false., time_fft, ' FFT')
×
1695
    call xf2d%execute_c2r(aphi, phix)
×
1696
    call time_message(.false., time_fft, ' FFT')
×
1697

1698
    phixf = transpose(phix)
×
1699

1700
    deallocate (aphi, phix)
×
1701
    !RN> this statement causes error for lahey with DEBUG. I don't know why
1702
    !<DD>Reinstating after discussion with RN, if this causes anyone an issue
1703
    !    then we can guard this line with some directives.   
1704
    call xf2d%delete
×
1705
  end subroutine transform2_2d
×
1706

1707
  !> FIXME : Add documentation
1708
  subroutine inverse2_2d (phixf, phi, nny, nnx)
×
1709
    use fft_work, only: FFT_TO_SPECTRAL_SPACE, init_rcfftw, time_fft
1710
    use kt_grids, only: naky, nakx => ntheta0, aky
1711
    use job_manage, only: time_message
1712
    implicit none
1713
    real, intent(in) :: phixf(:,:)
1714
    complex, intent(out) :: phi(:,:)
1715
    integer, intent(in) :: nnx, nny
1716
    complex, allocatable :: aphi(:,:)
×
1717
    real, allocatable :: phix(:,:)
×
1718
    real :: fac
1719
    integer :: ik, it
1720
    type (fft_type) :: xf2d
1721

1722
    !May be inefficient to create and destroy this fft plan
1723
    !on every call to the routine. We may want to move this
1724
    !variable to module level and check its created flag.
1725
    call init_rcfftw (xf2d, FFT_TO_SPECTRAL_SPACE, nny, nnx, 1)
×
1726

1727
    allocate (aphi (nny/2+1, nnx))
×
1728
    allocate (phix (nny, nnx))
×
1729
    phix = 0.; aphi = cmplx(0.,0.)
×
1730
    phix = transpose(phixf)
×
1731

1732
    ! transform
1733
    call time_message(.false., time_fft, ' FFT')
×
1734
    call xf2d%execute_r2c(phix, aphi)
×
1735
    call time_message(.false., time_fft, ' FFT')
×
1736

1737
    ! scale, dealias and transpose
1738
    do it = 1, nakx
×
1739
       do ik = 1, naky
×
1740
          fac = 2.0
×
1741
          if (aky(ik) < epsilon(0.0)) fac = 1.0
×
1742
          phi(it, ik) = aphi(ik, it) * fac * xf2d%scale
×
1743
       end do
1744
    end do
1745

1746
    deallocate (aphi, phix)
×
1747
    !RN> this statement causes error for lahey with DEBUG. I don't know why
1748
    !<DD>Reinstating after discussion with RN, if this causes anyone an issue
1749
    !    then we can guard this line with some directives.   
1750
    call xf2d%delete
×
1751
  end subroutine inverse2_2d
×
1752

1753
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1754

1755
  !> FIXME : Add documentation        
1756
  subroutine init_zf (ntgrid, howmany)
×
1757
    use fft_work, only: init_z, FFT_TO_REAL_SPACE
1758
    implicit none
1759
    integer, intent (in) :: ntgrid, howmany
1760

1761
    if (initialized_zf) return
×
1762
    initialized_zf = .true.
×
1763

1764
    ! Note we pass 2*ntgrid rather than 2*ntgrid + 1 so that we don't
1765
    ! include the duplicate theta point at {-pi,pi}
1766
    call init_z(zf_fft, FFT_TO_REAL_SPACE, 2*ntgrid, howmany)
×
1767
  end subroutine init_zf
1768

1769
!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
1770

1771
  !> FIXME : Add documentation        
1772
  subroutine kz_spectrum (an, an2)
×
1773
    use job_manage, only: time_message
1774
    use fft_work, only: time_fft
1775
    complex, dimension (:,:,:), intent(in out)  :: an
1776
    complex, dimension (:,:,:), intent(out) :: an2
1777
    an2 = 0.0
×
1778
    call time_message(.false., time_fft, ' FFT')
×
1779
    call zf_fft%execute_c2c(an, an2)
×
1780
    call time_message(.false., time_fft, ' FFT')
×
1781
    an2 = conjg(an2)*an2
×
1782
  end subroutine kz_spectrum
×
1783

1784
  !> FIXME : Add documentation
1785
  subroutine finish_transforms
×
1786
    use redistribute, only : delete_redist
1787
    use fft_work, only: finish_fft_work
1788
#ifdef SHMEM
1789
    use shm_mpi3, only : shm_free
1790
#endif
1791

1792
    if(allocated(xxf)) deallocate(xxf)
×
1793
    if(allocated(ia)) deallocate(ia)
×
1794
    if(allocated(iak)) deallocate(iak)
×
1795
    if(allocated(aidx)) deallocate(aidx)
×
1796
#ifndef SHMEM
1797
    if(allocated(ag)) deallocate(ag)
×
1798
#else
1799
    if (associated(ag)) call shm_free(ag)
1800
#endif
1801

1802
    call delete_redist(g2x)
×
1803
    call delete_redist(x2y)
×
1804

1805
    if(allocated(fft)) deallocate(fft) 
×
1806

1807
    !Destroy fftw plans
1808
    call yf_fft%delete
×
1809
    call yb_fft%delete
×
1810
    call xf_fft%delete
×
1811
    call xb_fft%delete
×
1812
    call zf_fft%delete
×
1813
    call xf3d_cr%delete
×
1814
    call xf3d_rc%delete
×
1815

1816
    !Reset init state flags
1817
    initialized = .false.
×
1818
    initialized_y_fft = .false.
×
1819
    initialized_x_redist = .false.
×
1820
    initialized_y_redist = .false.
×
1821
    initialized_3d = .false.
×
1822
    initialized_zf = .false.
×
1823

1824
    !Tidy up fft internals (FFTW3 only)
1825
    call finish_fft_work
×
1826
  end subroutine finish_transforms
×
1827

1828
#ifdef SHMEM
1829
  !> FIXME : Add documentation
1830
  subroutine compute_block_partition(whole_range, shift, extend)
1831
    ! computes shift + extend
1832
    use shm_mpi3, only : shm_info
1833
    implicit none
1834

1835
    integer, intent(in) :: whole_range
1836
    integer, intent(out) :: shift, extend
1837

1838
    integer n, chnk
1839

1840
    n = whole_range
1841
    chnk = n / shm_info%size
1842
    shift = shm_info%id * chnk + min(shm_info%id, mod(n,shm_info%size))
1843
    extend = chnk-1
1844
    if (shm_info%id< mod(n,shm_info%size)) extend= extend + 1
1845

1846
  end subroutine compute_block_partition
1847
#endif       
1848
end module gs2_transforms
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc