-
Notifications
You must be signed in to change notification settings - Fork 3
/
tea_leaf_ppcg.f90
249 lines (160 loc) · 4.97 KB
/
tea_leaf_ppcg.f90
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
MODULE tea_leaf_ppcg_module
USE tea_leaf_cheby_module
USE definitions_module
USE update_halo_module
IMPLICIT NONE
CONTAINS
SUBROUTINE tea_leaf_ppcg_init_sd(theta)
IMPLICIT NONE
INTEGER :: t
REAL(KIND=8) :: theta
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_init_sd_kernel_cuda(theta)
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_init_sd
SUBROUTINE tea_leaf_ppcg_init_sd_new(theta)
IMPLICIT NONE
INTEGER :: t
REAL(KIND=8) :: theta
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_init_sd_new_kernel_cuda(theta)
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_init_sd_new
SUBROUTINE tea_leaf_ppcg_init(ppcg_inner_iters, ch_alphas, ch_betas, theta, step, rrn)
IMPLICIT NONE
INTEGER :: ppcg_inner_iters,step
REAL(KIND=8) :: rrn,theta
REAL(KIND=8), DIMENSION(ppcg_inner_iters) :: ch_alphas,ch_betas
INTEGER :: t
REAL(KIND=8) :: tile_rrn
rrn = 0.0_8
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
tile_rrn = 0.0_8
CALL tea_leaf_ppcg_init_kernel_cuda(step, tile_rrn)
rrn = rrn + tile_rrn
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_init
SUBROUTINE tea_leaf_ppcg_inner(ch_alphas, ch_betas, inner_step, bounds_extra)
IMPLICIT NONE
INTEGER :: t, inner_step, bounds_extra
REAL(KIND=8), DIMENSION(:) :: ch_alphas, ch_betas
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_inner_kernel_cuda(inner_step, bounds_extra, chunk%chunk_neighbours)
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_inner
SUBROUTINE tea_leaf_ppcg_calc_zrnorm(rrn)
IMPLICIT NONE
INTEGER :: t
REAL(KIND=8) :: rrn, tile_rrn
rrn = 0.0_8
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
tile_rrn = 0.0_8
CALL tea_leaf_ppcg_calc_2norm_kernel_cuda(tile_rrn)
rrn = rrn + tile_rrn
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_calc_zrnorm
! New: ppcg_store_r
SUBROUTINE tea_leaf_ppcg_store_r()
IMPLICIT NONE
INTEGER :: t
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_store_r_kernel_cuda()
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_store_r
! New: update z
SUBROUTINE tea_leaf_ppcg_update_z()
IMPLICIT NONE
INTEGER :: t
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_update_z_kernel_cuda()
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_update_z
! New
SUBROUTINE tea_leaf_ppcg_calc_rrn(rrn)
IMPLICIT NONE
INTEGER :: t
REAL(KIND=8) :: rrn, tile_rrn
rrn = 0.0_8
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
tile_rrn = 0.0_8
CALL tea_leaf_ppcg_calc_rrn_kernel_cuda(tile_rrn)
rrn = rrn + tile_rrn
ENDDO
ENDIF
END SUBROUTINE
SUBROUTINE tea_calc_ls_coefs(ch_alphas, ch_betas, eigmin, eigmax, &
theta, ppcg_inner_steps)
INTEGER :: ppcg_inner_steps
REAL(KIND=8), DIMENSION(ppcg_inner_steps) :: ch_alphas, ch_betas
REAL(KIND=8) :: eigmin, eigmax, theta
! TODO
CALL tea_calc_ch_coefs(ch_alphas, ch_betas, eigmin, eigmax, &
theta, ppcg_inner_steps)
END SUBROUTINE
SUBROUTINE tea_leaf_run_ppcg_inner_steps(ch_alphas, ch_betas, theta, &
tl_ppcg_inner_steps, solve_time)
IMPLICIT NONE
INTEGER :: fields(NUM_FIELDS)
INTEGER :: tl_ppcg_inner_steps, ppcg_cur_step
REAL(KIND=8) :: theta
REAL(KIND=8) :: halo_time, timer, solve_time
REAL(KIND=8), DIMENSION(max_iters) :: ch_alphas, ch_betas
INTEGER(KIND=4) :: inner_step, bounds_extra
fields = 0
fields(FIELD_U) = 1
IF (profiler_on) halo_time=timer()
CALL update_halo(fields,1)
IF (profiler_on) solve_time = solve_time + (timer() - halo_time)
IF (tl_ppcg_inner_steps < 0) RETURN
CALL tea_leaf_ppcg_init_sd_new(theta)
! inner steps
DO ppcg_cur_step=1,tl_ppcg_inner_steps,halo_exchange_depth
fields = 0
fields(FIELD_SD) = 1
!fields(FIELD_R) = 1
IF (profiler_on) halo_time = timer()
CALL update_halo(fields,halo_exchange_depth)
IF (profiler_on) solve_time = solve_time + (timer()-halo_time)
inner_step = ppcg_cur_step
fields = 0
fields(FIELD_SD) = 1
DO bounds_extra = halo_exchange_depth-1, 0, -1
CALL tea_leaf_ppcg_inner(ch_alphas, ch_betas, (ppcg_cur_step + halo_exchange_depth-1 - bounds_extra), bounds_extra)
IF (profiler_on) halo_time = timer()
CALL update_boundary(fields, 1)
IF (profiler_on) solve_time = solve_time + (timer()-halo_time)
!print*, (ppcg_cur_step + halo_exchange_depth-1 -bounds_extra)
IF ((ppcg_cur_step + halo_exchange_depth-1 -bounds_extra) .eq. tl_ppcg_inner_steps) EXIT
ENDDO
ENDDO
!stop
fields = 0
fields(FIELD_P) = 1
CALL tea_leaf_ppcg_update_z()
END SUBROUTINE tea_leaf_run_ppcg_inner_steps
SUBROUTINE tea_leaf_ppcg_calc_p(beta)
IMPLICIT NONE
INTEGER :: t
REAL(KIND=8) :: beta
IF (use_cuda_kernels) THEN
DO t=1,tiles_per_task
CALL tea_leaf_ppcg_calc_p_kernel_cuda(beta)
ENDDO
ENDIF
END SUBROUTINE tea_leaf_ppcg_calc_p
END MODULE tea_leaf_ppcg_module