Skip to content

Scalar indexing in the callback integration #156

@huiyuxie

Description

@huiyuxie

Issue scalar indexing on GPU arrays still exits in the callback integration. The errors reported from the examples are as below:

ERROR: Scalar indexing is disallowed.
Invocation of getindex resulted in scalar indexing of a GPU array.
This is typically caused by calling an iterating implementation of a method.
Such implementations *do not* execute on the GPU, but very slowly on the CPU,
and therefore should be avoided.

If you want to allow scalar iteration, use `allowscalar` or `@allowscalar`
to enable scalar iteration globally or for the operations in question.
Stacktrace:
  [1] error(s::String)
    @ Base .\error.jl:35
  [2] errorscalar(op::String)
    @ GPUArraysCore C:\Users\huiyu\.julia\packages\GPUArraysCore\aNaXo\src\GPUArraysCore.jl:151
  [3] _assertscalar(op::String, behavior::GPUArraysCore.ScalarIndexing)
    @ GPUArraysCore C:\Users\huiyu\.julia\packages\GPUArraysCore\aNaXo\src\GPUArraysCore.jl:124
  [4] assertscalar(op::String)
    @ GPUArraysCore C:\Users\huiyu\.julia\packages\GPUArraysCore\aNaXo\src\GPUArraysCore.jl:112
  [5] getindex
    @ C:\Users\huiyu\.julia\packages\GPUArrays\uiVyU\src\host\indexing.jl:50 [inlined]
  [6] scalar_getindex
    @ C:\Users\huiyu\.julia\packages\GPUArrays\uiVyU\src\host\indexing.jl:36 [inlined]
  [7] _getindex
    @ C:\Users\huiyu\.julia\packages\GPUArrays\uiVyU\src\host\indexing.jl:19 [inlined]
  [8] getindex
    @ C:\Users\huiyu\.julia\packages\GPUArrays\uiVyU\src\host\indexing.jl:17 [inlined]
  [9] #474
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\solvers\dg.jl:559 [inlined]
 [10] ntuple
    @ .\ntuple.jl:48 [inlined]
 [11] get_node_vars
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\solvers\dg.jl:559 [inlined]
 [12] (::Trixi.var"#1521#1527"{…})(u::CUDA.CuArray{…}, i::Int64, element::Int64, equations::LinearScalarAdvectionEquation1D{…}, dg::DG{…})
    @ Trixi C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\callbacks_step\analysis_dg1d.jl:179
 [13] integrate_via_indices(::Trixi.var"#1521#1527"{…}, ::CUDA.CuArray{…}, ::TreeMesh{…}, ::LinearScalarAdvectionEquation1D{…}, ::DG{…}, ::@NamedTuple{…}; normalize::Bool)
    @ TrixiCUDA C:\Users\huiyu\.julia\dev\TrixiCUDA.jl\src\callbacks_step\analysis_dg_1d.jl:7
 [14] integrate_via_indices
    @ C:\Users\huiyu\.julia\dev\TrixiCUDA.jl\src\callbacks_step\analysis_dg_1d.jl:1 [inlined]
 [15] #integrate#1520
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\callbacks_step\analysis_dg1d.jl:177 [inlined]
 [16] integrate
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\callbacks_step\analysis_dg1d.jl:174 [inlined]
 [17] #integrate#1347
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\semidiscretization\semidiscretization.jl:61 [inlined]
 [18] integrate
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\semidiscretization\semidiscretization.jl:56 [inlined]
 [19] #integrate#1348
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\semidiscretization\semidiscretization.jl:65 [inlined]
 [20] integrate
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\semidiscretization\semidiscretization.jl:64 [inlined]
 [21] initialize!(cb::DiscreteCallback{…}, u_ode::CUDA.CuArray{…}, du_ode::CUDA.CuArray{…}, t::Float64, integrator::OrdinaryDiffEqCore.ODEIntegrator{…}, semi::SemidiscretizationHyperbolicGPU{…})
    @ Trixi C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\callbacks_step\analysis.jl:154
 [22] initialize!
    @ C:\Users\huiyu\.julia\packages\Trixi\Mofyy\src\callbacks_step\analysis.jl:147 [inlined]
 [23] initialize!
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\callbacks.jl:13 [inlined]
 [24] initialize!
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\callbacks.jl:14 [inlined]
 [25] initialize!
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\callbacks.jl:7 [inlined]
 [26] initialize_callbacks!(integrator::OrdinaryDiffEqCore.ODEIntegrator{…}, initialize_save::Bool)
    @ OrdinaryDiffEqCore C:\Users\huiyu\.julia\packages\OrdinaryDiffEqCore\LG1u6\src\solve.jl:730
 [27] __init(prob::ODEProblem{…}, alg::CarpenterKennedy2N54{…}, timeseries_init::Tuple{}, ts_init::Tuple{}, ks_init::Tuple{}, recompile::Type{…}; saveat::Tuple{}, tstops::Tuple{}, d_discontinuities::Tuple{}, save_idxs::Nothing, save_everystep::Bool, save_on::Bool, save_start::Bool, save_end::Nothing, callback::CallbackSet{…}, dense::Bool, calck::Bool, dt::Float64, dtmin::Float64, dtmax::Float64, force_dtmin::Bool, adaptive::Bool, gamma::Int64, abstol::Nothing, reltol::Nothing, qmin::Int64, qmax::Int64, qsteady_min::Int64, qsteady_max::Int64, beta1::Nothing, beta2::Nothing, qoldinit::Int64, controller::Nothing, fullnormalize::Bool, failfactor::Int64, maxiters::Int64, internalnorm::typeof(DiffEqBase.ODE_DEFAULT_NORM), internalopnorm::typeof(LinearAlgebra.opnorm), isoutofdomain::typeof(DiffEqBase.ODE_DEFAULT_ISOUTOFDOMAIN), unstable_check::typeof(DiffEqBase.ODE_DEFAULT_UNSTABLE_CHECK), verbose::Bool, timeseries_errors::Bool, dense_errors::Bool, advance_to_tstop::Bool, stop_at_next_tstop::Bool, initialize_save::Bool, progress::Bool, progress_steps::Int64, progress_name::String, progress_message::typeof(DiffEqBase.ODE_DEFAULT_PROG_MESSAGE), progress_id::Symbol, userdata::Nothing, allow_extrapolation::Bool, initialize_integrator::Bool, alias::ODEAliasSpecifier, initializealg::OrdinaryDiffEqCore.DefaultInit, kwargs::@Kwargs{})
    @ OrdinaryDiffEqCore C:\Users\huiyu\.julia\packages\OrdinaryDiffEqCore\LG1u6\src\solve.jl:586
 [28] __init (repeats 5 times)
    @ C:\Users\huiyu\.julia\packages\OrdinaryDiffEqCore\LG1u6\src\solve.jl:11 [inlined]
 [29] #__solve#62
    @ C:\Users\huiyu\.julia\packages\OrdinaryDiffEqCore\LG1u6\src\solve.jl:6 [inlined]
 [30] __solve
    @ C:\Users\huiyu\.julia\packages\OrdinaryDiffEqCore\LG1u6\src\solve.jl:1 [inlined]
 [31] solve_call(_prob::ODEProblem{…}, args::CarpenterKennedy2N54{…}; merge_callbacks::Bool, kwargshandle::Nothing, kwargs::@Kwargs{…})
    @ DiffEqBase C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\solve.jl:667
 [32] solve_call
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\solve.jl:624 [inlined]
 [33] #solve_up#45
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\solve.jl:1199 [inlined]
 [34] solve_up
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\solve.jl:1177 [inlined]
 [35] #solve#43
    @ C:\Users\huiyu\.julia\packages\DiffEqBase\RKckX\src\solve.jl:1089 [inlined]
 [36] top-level scope
    @ c:\Users\huiyu\.julia\dev\TrixiCUDA.jl\examples\advection_basic_1d.jl:58
Some type information was truncated. Use `show(err)` to see complete types.

The root cause may be the function from Trixi.jl,

@inline function get_node_vars(u, equations, solver::DG, indices...)
    # There is a cut-off at `n == 10` inside of the method
    # `ntuple(f::F, n::Integer) where F` in Base at ntuple.jl:17
    # in Julia `v1.5`, leading to type instabilities if
    # more than ten variables are used. That's why we use
    # `Val(...)` below.
    # We use `@inline` to make sure that the `getindex` calls are
    # really inlined, which might be the default choice of the Julia
    # compiler for standard `Array`s but not necessarily for more
    # advanced array types such as `PtrArray`s, cf.
    # https://github.com/JuliaSIMD/VectorizationBase.jl/issues/55
    SVector(ntuple(@inline(v->u[v, indices...]), Val(nvariables(equations))))
end

which leads to the scalar indexing on GPU arrays (but still have no idea why it works in kernels not in plain calls). So it is better to test this function outside the kernels first to verify the cause.

Metadata

Metadata

Assignees

Labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions