Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit aac0915

Browse files
iomaganarisolupton
authored andcommitted
First working commit of openmp async execution
1 parent d6bf37c commit aac0915

File tree

8 files changed

+25
-20
lines changed

8 files changed

+25
-20
lines changed

coreneuron/mechanism/capac.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ void nrn_jacob_capacitance(NrnThread* _nt, Memb_list* ml, int /* type */) {
7171
ni [0:_cntml_actual],
7272
_vec_d [0:_nt->end]) if (_nt->compute_gpu)
7373
async(_nt->streams[_nt->stream_id]))
74-
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu))
74+
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
7575
for (_iml = 0; _iml < _cntml_actual; _iml++) {
7676
_vec_d[ni[_iml]] += cfac * cm;
7777
}
@@ -117,7 +117,7 @@ void nrn_cur_capacitance(NrnThread* _nt, Memb_list* ml, int /* type */) {
117117
ni [0:_cntml_actual],
118118
_vec_rhs [0:_nt->end]) if (_nt->compute_gpu)
119119
async(_nt->streams[_nt->stream_id]))
120-
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu))
120+
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
121121
for (int _iml = 0; _iml < _cntml_actual; _iml++) {
122122
i_cap = cfac * cm * _vec_rhs[ni[_iml]];
123123
}

coreneuron/mechanism/eion.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,7 @@ void nrn_cur_ion(NrnThread* nt, Memb_list* ml, int type) {
268268
nrn_ion_global_map
269269
[0:nrn_ion_global_map_size] [0:ion_global_map_member_size]) if (nt->compute_gpu)
270270
async(nt->streams[nt->stream_id]))
271-
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
271+
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
272272
for (int _iml = 0; _iml < _cntml_actual; ++_iml) {
273273
dcurdv = 0.;
274274
cur = 0.;
@@ -341,7 +341,7 @@ void second_order_cur(NrnThread* _nt, int secondorder) {
341341
ni [0:_cntml_actual],
342342
_vec_rhs [0:_nt->end]) if (_nt->compute_gpu)
343343
async(_nt->streams[_nt->stream_id]))
344-
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu))
344+
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
345345
for (int _iml = 0; _iml < _cntml_actual; ++_iml) {
346346
cur += dcurdv * (_vec_rhs[ni[_iml]]);
347347
}

coreneuron/network/netcvode.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -534,7 +534,7 @@ void NetCvode::check_thresh(NrnThread* nt) { // for default method
534534
nrn_pragma_acc(parallel loop present(
535535
nt [0:1], presyns_helper [0:nt->n_presyn], presyns [0:nt->n_presyn], actual_v [0:nt->end])
536536
copy(net_send_buf_count) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
537-
nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu))
537+
nrn_pragma_omp(target teams distribute parallel for map(tofrom: net_send_buf_count) if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
538538
for (int i = 0; i < nt->ncell; ++i) {
539539
PreSyn* ps = presyns + i;
540540
PreSynHelper* psh = presyns_helper + i;
@@ -562,6 +562,7 @@ void NetCvode::check_thresh(NrnThread* nt) { // for default method
562562
}
563563
}
564564
nrn_pragma_acc(wait async(nt->streams[nt->stream_id]))
565+
nrn_pragma_omp(taskwait)
565566
nt->_net_send_buffer_cnt = net_send_buf_count;
566567

567568
if (nt->compute_gpu && nt->_net_send_buffer_cnt) {

coreneuron/network/partrans.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ void nrnthread_v_transfer(NrnThread* _nt) {
123123
tar_data [0:ndata],
124124
insrc_buf_ [0:n_insrc_buf]) if (_nt->compute_gpu)
125125
async(_nt->streams[_nt->stream_id]))
126-
nrn_pragma_omp(target teams distribute parallel for simd map(to: tar_indices[0:ntar]) if(_nt->compute_gpu))
126+
nrn_pragma_omp(target teams distribute parallel for simd map(to: tar_indices[0:ntar]) if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
127127
for (size_t i = 0; i < ntar; ++i) {
128128
tar_data[tar_indices[i]] = insrc_buf_[insrc_indices[i]];
129129
}

coreneuron/permute/cellorder.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -607,7 +607,7 @@ void solve_interleaved2(int ith) {
607607
stridedispl [0:nwarp + 1],
608608
rootbegin [0:nwarp + 1],
609609
nodebegin [0:nwarp + 1]) if (nt->compute_gpu) async(nt->streams[nt->stream_id]))
610-
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
610+
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
611611
for (int icore = 0; icore < ncore; ++icore) {
612612
int iwarp = icore / warpsize; // figure out the >> value
613613
int ic = icore & (warpsize - 1); // figure out the & mask
@@ -627,6 +627,7 @@ void solve_interleaved2(int ith) {
627627
#endif
628628
}
629629
nrn_pragma_acc(wait async(nt->streams[nt->stream_id]))
630+
nrn_pragma_omp(taskwait)
630631
#ifdef _OPENACC
631632
}
632633
#endif
@@ -661,13 +662,14 @@ void solve_interleaved1(int ith) {
661662
lastnode [0:ncell],
662663
cellsize [0:ncell]) if (nt->compute_gpu)
663664
async(nt->streams[nt->stream_id]))
664-
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
665+
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
665666
for (int icell = 0; icell < ncell; ++icell) {
666667
int icellsize = cellsize[icell];
667668
triang_interleaved(nt, icell, icellsize, nstride, stride, lastnode);
668669
bksub_interleaved(nt, icell, icellsize, nstride, stride, firstnode);
669670
}
670671
nrn_pragma_acc(wait async(nt->streams[nt->stream_id]))
672+
nrn_pragma_omp(taskwait)
671673
}
672674

673675
void solve_interleaved(int ith) {

coreneuron/sim/fadvance_core.cpp

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ void dt2thread(double adt) { /* copied from nrnoc/fadvance.c */
8282
async(nt->streams[nt->stream_id]) if (nt->compute_gpu))
8383
// clang-format off
8484
nrn_pragma_omp(target update to(nt->_t, nt->_dt, nt->cj)
85-
if(nt->compute_gpu))
85+
if(nt->compute_gpu))
8686
// clang-format on
8787
}
8888
}
@@ -207,14 +207,14 @@ void update(NrnThread* _nt) {
207207
if (secondorder) {
208208
nrn_pragma_acc(parallel loop present(vec_v [0:i2], vec_rhs [0:i2]) if (_nt->compute_gpu)
209209
async(_nt->streams[_nt->stream_id]))
210-
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu))
210+
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
211211
for (int i = 0; i < i2; ++i) {
212212
vec_v[i] += 2. * vec_rhs[i];
213213
}
214214
} else {
215215
nrn_pragma_acc(parallel loop present(vec_v [0:i2], vec_rhs [0:i2]) if (_nt->compute_gpu)
216216
async(_nt->streams[_nt->stream_id]))
217-
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu))
217+
nrn_pragma_omp(target teams distribute parallel for simd if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
218218
for (int i = 0; i < i2; ++i) {
219219
vec_v[i] += vec_rhs[i];
220220
}
@@ -296,7 +296,7 @@ void nrncore2nrn_send_values(NrnThread* nth) {
296296

297297
nrn_pragma_acc(parallel loop present(tr [0:1]) if (nth->compute_gpu)
298298
async(nth->streams[nth->stream_id]))
299-
nrn_pragma_omp(target teams distribute parallel for simd if(nth->compute_gpu))
299+
nrn_pragma_omp(target teams distribute parallel for simd if(nth->compute_gpu) depend(inout: nth->streams[nth->stream_id]) nowait)
300300
for (int i = 0; i < tr->n_trajec; ++i) {
301301
tr->varrays[i][vs] = *tr->gather[i];
302302
}
@@ -344,6 +344,7 @@ static void* nrn_fixed_step_thread(NrnThread* nth) {
344344
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
345345
nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
346346
nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
347+
nrn_pragma_omp(taskwait)
347348
fixed_play_continuous(nth);
348349

349350
{
@@ -380,6 +381,7 @@ void* nrn_fixed_step_lastpart(NrnThread* nth) {
380381
nrn_pragma_acc(update device(nth->_t) if (nth->compute_gpu) async(nth->streams[nth->stream_id]))
381382
nrn_pragma_acc(wait async(nth->streams[nth->stream_id]))
382383
nrn_pragma_omp(target update to(nth->_t) if (nth->compute_gpu))
384+
nrn_pragma_omp(taskwait)
383385
fixed_play_continuous(nth);
384386
nonvint(nth);
385387
nrncore2nrn_send_values(nth);

coreneuron/sim/fast_imem.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ void nrn_calc_fast_imem(NrnThread* nt) {
5353
nrn_pragma_acc(
5454
parallel loop present(vec_rhs, vec_area, fast_imem_d, fast_imem_rhs) if (nt->compute_gpu)
5555
async(nt->streams[nt->stream_id]))
56-
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
56+
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
5757
for (int i = i1; i < i3; ++i) {
5858
fast_imem_rhs[i] = (fast_imem_d[i] * vec_rhs[i] + fast_imem_rhs[i]) * vec_area[i] * 0.01;
5959
}
@@ -70,7 +70,7 @@ void nrn_calc_fast_imem_init(NrnThread* nt) {
7070
double* fast_imem_rhs = nt->nrn_fast_imem->nrn_sav_rhs;
7171
nrn_pragma_acc(parallel loop present(vec_rhs, vec_area, fast_imem_rhs) if (nt->compute_gpu)
7272
async(nt->streams[nt->stream_id]))
73-
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu))
73+
nrn_pragma_omp(target teams distribute parallel for simd if(nt->compute_gpu) depend(inout: nt->streams[nt->stream_id]) nowait)
7474
for (int i = i1; i < i3; ++i) {
7575
fast_imem_rhs[i] = (vec_rhs[i] + fast_imem_rhs[i]) * vec_area[i] * 0.01;
7676
}

coreneuron/sim/treeset_core.cpp

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ static void nrn_rhs(NrnThread* _nt) {
3434

3535
nrn_pragma_acc(parallel loop present(vec_rhs [0:i3], vec_d [0:i3]) if (_nt->compute_gpu)
3636
async(_nt->streams[_nt->stream_id]))
37-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
37+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
3838
for (int i = i1; i < i3; ++i) {
3939
vec_rhs[i] = 0.;
4040
vec_d[i] = 0.;
@@ -46,7 +46,7 @@ static void nrn_rhs(NrnThread* _nt) {
4646
nrn_pragma_acc(
4747
parallel loop present(fast_imem_d [i1:i3], fast_imem_rhs [i1:i3]) if (_nt->compute_gpu)
4848
async(_nt->streams[_nt->stream_id]))
49-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
49+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
5050
for (int i = i1; i < i3; ++i) {
5151
fast_imem_d[i] = 0.;
5252
fast_imem_rhs[i] = 0.;
@@ -76,7 +76,7 @@ static void nrn_rhs(NrnThread* _nt) {
7676
double* p = _nt->nrn_fast_imem->nrn_sav_rhs;
7777
nrn_pragma_acc(parallel loop present(p, vec_rhs) if (_nt->compute_gpu)
7878
async(_nt->streams[_nt->stream_id]))
79-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
79+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
8080
for (int i = i1; i < i3; ++i) {
8181
p[i] -= vec_rhs[i];
8282
}
@@ -93,7 +93,7 @@ static void nrn_rhs(NrnThread* _nt) {
9393
vec_v [0:i3],
9494
parent_index [0:i3]) if (_nt->compute_gpu)
9595
async(_nt->streams[_nt->stream_id]))
96-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
96+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
9797
for (int i = i2; i < i3; ++i) {
9898
double dv = vec_v[parent_index[i]] - vec_v[i];
9999
/* our connection coefficients are negative so */
@@ -153,7 +153,7 @@ static void nrn_lhs(NrnThread* _nt) {
153153
*/
154154
double* p = _nt->nrn_fast_imem->nrn_sav_d;
155155
nrn_pragma_acc(parallel loop present(p, vec_d) if (_nt->compute_gpu) async(_nt->streams[_nt->stream_id]))
156-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
156+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
157157
for (int i = i1; i < i3; ++i) {
158158
p[i] += vec_d[i];
159159
}
@@ -163,7 +163,7 @@ static void nrn_lhs(NrnThread* _nt) {
163163
nrn_pragma_acc(parallel loop present(
164164
vec_d [0:i3], vec_a [0:i3], vec_b [0:i3], parent_index [0:i3]) if (_nt->compute_gpu)
165165
async(_nt->streams[_nt->stream_id]))
166-
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu))
166+
nrn_pragma_omp(target teams distribute parallel for if(_nt->compute_gpu) depend(inout: _nt->streams[_nt->stream_id]) nowait)
167167
for (int i = i2; i < i3; ++i) {
168168
nrn_pragma_acc(atomic update)
169169
nrn_pragma_omp(atomic update)

0 commit comments

Comments
 (0)