@@ -685,18 +685,67 @@ void CUDAChecker::Enter(const parser::CUFKernelDoConstruct &x) {
685685 std::get<std::list<parser::CUFReduction>>(directive.t )) {
686686 CheckReduce (context_, reduce);
687687 }
688- inCUFKernelDoConstruct_ = true ;
688+ ++deviceConstructDepth_;
689+ }
690+
691+ static bool IsOpenACCComputeConstruct (const parser::OpenACCBlockConstruct &x) {
692+ const auto &beginBlockDirective =
693+ std::get<Fortran::parser::AccBeginBlockDirective>(x.t );
694+ const auto &blockDirective =
695+ std::get<Fortran::parser::AccBlockDirective>(beginBlockDirective.t );
696+ if (blockDirective.v == llvm::acc::ACCD_parallel ||
697+ blockDirective.v == llvm::acc::ACCD_serial ||
698+ blockDirective.v == llvm::acc::ACCD_kernels) {
699+ return true ;
700+ }
701+ return false ;
689702}
690703
691704void CUDAChecker::Leave (const parser::CUFKernelDoConstruct &) {
692- inCUFKernelDoConstruct_ = false ;
705+ --deviceConstructDepth_;
706+ }
707+ void CUDAChecker::Enter (const parser::OpenACCBlockConstruct &x) {
708+ if (IsOpenACCComputeConstruct (x)) {
709+ ++deviceConstructDepth_;
710+ }
711+ }
712+ void CUDAChecker::Leave (const parser::OpenACCBlockConstruct &x) {
713+ if (IsOpenACCComputeConstruct (x)) {
714+ --deviceConstructDepth_;
715+ }
716+ }
717+ void CUDAChecker::Enter (const parser::OpenACCCombinedConstruct &) {
718+ ++deviceConstructDepth_;
719+ }
720+ void CUDAChecker::Leave (const parser::OpenACCCombinedConstruct &) {
721+ --deviceConstructDepth_;
722+ }
723+ void CUDAChecker::Enter (const parser::OpenACCLoopConstruct &) {
724+ ++deviceConstructDepth_;
725+ }
726+ void CUDAChecker::Leave (const parser::OpenACCLoopConstruct &) {
727+ --deviceConstructDepth_;
728+ }
729+ void CUDAChecker::Enter (const parser::DoConstruct &x) {
730+ if (x.IsDoConcurrent () &&
731+ context_.foldingContext ().languageFeatures ().IsEnabled (
732+ common::LanguageFeature::DoConcurrentOffload)) {
733+ ++deviceConstructDepth_;
734+ }
735+ }
736+ void CUDAChecker::Leave (const parser::DoConstruct &x) {
737+ if (x.IsDoConcurrent () &&
738+ context_.foldingContext ().languageFeatures ().IsEnabled (
739+ common::LanguageFeature::DoConcurrentOffload)) {
740+ --deviceConstructDepth_;
741+ }
693742}
694743
695744void CUDAChecker::Enter (const parser::AssignmentStmt &x) {
696745 auto lhsLoc{std::get<parser::Variable>(x.t ).GetSource ()};
697746 const auto &scope{context_.FindScope (lhsLoc)};
698747 const Scope &progUnit{GetProgramUnitContaining (scope)};
699- if (IsCUDADeviceContext (&progUnit) || inCUFKernelDoConstruct_ ) {
748+ if (IsCUDADeviceContext (&progUnit) || deviceConstructDepth_ > 0 ) {
700749 return ; // Data transfer with assignment is only perform on host.
701750 }
702751
@@ -714,6 +763,16 @@ void CUDAChecker::Enter(const parser::AssignmentStmt &x) {
714763 context_.Say (lhsLoc,
715764 " More than one reference to a CUDA object on the right hand side of the assigment" _err_en_US);
716765 }
766+
767+ if (Fortran::evaluate::HasCUDADeviceAttrs (assign->lhs ) &&
768+ Fortran::evaluate::HasCUDAImplicitTransfer (assign->rhs )) {
769+ if (GetNbOfCUDAManagedOrUnifiedSymbols (assign->lhs ) == 1 &&
770+ GetNbOfCUDAManagedOrUnifiedSymbols (assign->rhs ) == 1 &&
771+ GetNbOfCUDADeviceSymbols (assign->rhs ) == 1 ) {
772+ return ; // This is a special case handled on the host.
773+ }
774+ context_.Say (lhsLoc, " Unsupported CUDA data transfer" _err_en_US);
775+ }
717776}
718777
719778} // namespace Fortran::semantics
0 commit comments