diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td index 2b400b012d6ed..0275447e1090a 100644 --- a/clang/include/clang/Basic/Builtins.td +++ b/clang/include/clang/Basic/Builtins.td @@ -5235,6 +5235,12 @@ def HLSLGetSpirvSpecConstant : LangBuiltin<"HLSL_LANG">, HLSLScalarTemplate { let Prototype = "T(unsigned int, T)"; } +def HLSLF16ToF32 : LangBuiltin<"HLSL_LANG"> { + let Spellings = ["__builtin_hlsl_elementwise_f16tof32"]; + let Attributes = [NoThrow, Const, CustomTypeChecking]; + let Prototype = "void(...)"; +} + // Builtins for XRay. def XRayCustomEvent : Builtin { let Spellings = ["__xray_customevent"]; diff --git a/clang/lib/CodeGen/CGHLSLBuiltins.cpp b/clang/lib/CodeGen/CGHLSLBuiltins.cpp index fbf4a5722caed..b6928ce7d9c44 100644 --- a/clang/lib/CodeGen/CGHLSLBuiltins.cpp +++ b/clang/lib/CodeGen/CGHLSLBuiltins.cpp @@ -160,6 +160,57 @@ static Value *handleHlslSplitdouble(const CallExpr *E, CodeGenFunction *CGF) { return LastInst; } +static Value *handleElementwiseF16ToF32(CodeGenFunction &CGF, + const CallExpr *E) { + Value *Op0 = CGF.EmitScalarExpr(E->getArg(0)); + QualType Op0Ty = E->getArg(0)->getType(); + llvm::Type *ResType = CGF.FloatTy; + uint64_t NumElements = 0; + if (Op0->getType()->isVectorTy()) { + NumElements = + E->getArg(0)->getType()->castAs()->getNumElements(); + ResType = + llvm::VectorType::get(ResType, ElementCount::getFixed(NumElements)); + } + if (!Op0Ty->hasUnsignedIntegerRepresentation()) + llvm_unreachable( + "f16tof32 operand must have an unsigned int representation"); + + if (CGF.CGM.getTriple().isDXIL()) + return CGF.Builder.CreateIntrinsic(ResType, Intrinsic::dx_legacyf16tof32, + ArrayRef{Op0}, nullptr, + "hlsl.f16tof32"); + + if (CGF.CGM.getTriple().isSPIRV()) { + // We use the SPIRV UnpackHalf2x16 operation to avoid the need for the + // Int16 and Float16 capabilities + auto UnpackType = + llvm::VectorType::get(CGF.FloatTy, ElementCount::getFixed(2)); + if (NumElements == 0) { + // a scalar input - simply extract the first element of the unpacked + // vector + Value *Unpack = CGF.Builder.CreateIntrinsic( + UnpackType, Intrinsic::spv_unpackhalf2x16, ArrayRef{Op0}); + return CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); + } else { + // a vector input - build a congruent output vector by iterating through + // the input vector calling unpackhalf2x16 for each element + Value *Result = PoisonValue::get(ResType); + for (uint64_t i = 0; i < NumElements; i++) { + Value *InVal = CGF.Builder.CreateExtractElement(Op0, i); + Value *Unpack = CGF.Builder.CreateIntrinsic( + UnpackType, Intrinsic::spv_unpackhalf2x16, + ArrayRef{InVal}); + Value *Res = CGF.Builder.CreateExtractElement(Unpack, (uint64_t)0); + Result = CGF.Builder.CreateInsertElement(Result, Res, i); + } + return Result; + } + } + + llvm_unreachable("Intrinsic F16ToF32 not supported by target architecture"); +} + static Value *emitBufferStride(CodeGenFunction *CGF, const Expr *HandleExpr, LValue &Stride) { // Figure out the stride of the buffer elements from the handle type. @@ -579,6 +630,9 @@ Value *CodeGenFunction::EmitHLSLBuiltinExpr(unsigned BuiltinID, /*ReturnType=*/X->getType(), CGM.getHLSLRuntime().getDegreesIntrinsic(), ArrayRef{X}, nullptr, "hlsl.degrees"); } + case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { + return handleElementwiseF16ToF32(*this, E); + } case Builtin::BI__builtin_hlsl_elementwise_frac: { Value *Op0 = EmitScalarExpr(E->getArg(0)); if (!E->getArg(0)->getType()->hasFloatingRepresentation()) diff --git a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h index a918af39e4074..4c5861c2c5f9d 100644 --- a/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h +++ b/clang/lib/Headers/hlsl/hlsl_alias_intrinsics.h @@ -1052,6 +1052,27 @@ float3 exp2(float3); _HLSL_BUILTIN_ALIAS(__builtin_elementwise_exp2) float4 exp2(float4); +//===----------------------------------------------------------------------===// +// f16tof32 builtins +//===----------------------------------------------------------------------===// + +/// \fn float f16tof32(uint x) +/// \brief Returns the half value stored in the low 16 bits of the uint arg +/// converted to a float. +/// \param x The uint containing two half values. +/// +/// The float value of the half value found in the low 16 bits of the \a xi +/// parameter. + +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float f16tof32(uint); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float2 f16tof32(uint2); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float3 f16tof32(uint3); +_HLSL_BUILTIN_ALIAS(__builtin_hlsl_elementwise_f16tof32) +float4 f16tof32(uint4); + //===----------------------------------------------------------------------===// // firstbithigh builtins //===----------------------------------------------------------------------===// diff --git a/clang/lib/Sema/SemaHLSL.cpp b/clang/lib/Sema/SemaHLSL.cpp index 94a490a8f68dc..b9707f0036765 100644 --- a/clang/lib/Sema/SemaHLSL.cpp +++ b/clang/lib/Sema/SemaHLSL.cpp @@ -2802,6 +2802,23 @@ static bool CheckUnsignedIntRepresentation(Sema *S, SourceLocation Loc, return false; } +static bool CheckExpectedBitWidth(Sema *S, CallExpr *TheCall, + unsigned ArgOrdinal, unsigned Width) { + QualType ArgTy = TheCall->getArg(0)->getType(); + if (auto *VTy = ArgTy->getAs()) + ArgTy = VTy->getElementType(); + // ensure arg type has expected bit width + uint64_t ElementBitCount = + S->getASTContext().getTypeSizeInChars(ArgTy).getQuantity() * 8; + if (ElementBitCount != Width) { + S->Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_integer_incorrect_bit_count) + << Width << ElementBitCount; + return true; + } + return false; +} + static void SetElementTypeAsReturnType(Sema *S, CallExpr *TheCall, QualType ReturnType) { auto *VecTyA = TheCall->getArg(0)->getType()->getAs(); @@ -2961,24 +2978,16 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { CheckUnsignedIntVecRepresentation)) return true; - auto *VTy = TheCall->getArg(0)->getType()->getAs(); // ensure arg integers are 32-bits - uint64_t ElementBitCount = getASTContext() - .getTypeSizeInChars(VTy->getElementType()) - .getQuantity() * - 8; - if (ElementBitCount != 32) { - SemaRef.Diag(TheCall->getBeginLoc(), - diag::err_integer_incorrect_bit_count) - << 32 << ElementBitCount; + if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32)) return true; - } // ensure both args are vectors of total bit size of a multiple of 64 + auto *VTy = TheCall->getArg(0)->getType()->getAs(); int NumElementsArg = VTy->getNumElements(); if (NumElementsArg != 2 && NumElementsArg != 4) { SemaRef.Diag(TheCall->getBeginLoc(), diag::err_vector_incorrect_bit_count) - << 1 /*a multiple of*/ << 64 << NumElementsArg * ElementBitCount; + << 1 /*a multiple of*/ << 64 << NumElementsArg * 32; return true; } @@ -3295,7 +3304,7 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { break; } // Note these are llvm builtins that we want to catch invalid intrinsic - // generation. Normal handling of these builitns will occur elsewhere. + // generation. Normal handling of these builtins will occur elsewhere. case Builtin::BI__builtin_elementwise_bitreverse: { // does not include a check for number of arguments // because that is done previously @@ -3405,6 +3414,30 @@ bool SemaHLSL::CheckBuiltinFunctionCall(unsigned BuiltinID, CallExpr *TheCall) { } break; } + case Builtin::BI__builtin_hlsl_elementwise_f16tof32: { + if (SemaRef.checkArgCount(TheCall, 1)) + return true; + if (CheckAllArgTypesAreCorrect(&SemaRef, TheCall, + CheckUnsignedIntRepresentation)) + return true; + // ensure arg integers are 32 bits + if (CheckExpectedBitWidth(&SemaRef, TheCall, 0, 32)) + return true; + // check it wasn't a bool type + QualType ArgTy = TheCall->getArg(0)->getType(); + if (auto *VTy = ArgTy->getAs()) + ArgTy = VTy->getElementType(); + if (ArgTy->isBooleanType()) { + SemaRef.Diag(TheCall->getArg(0)->getBeginLoc(), + diag::err_builtin_invalid_arg_type) + << 1 << /* scalar or vector of */ 5 << /* unsigned int */ 3 + << /* no fp */ 0 << TheCall->getArg(0)->getType(); + return true; + } + + SetElementTypeAsReturnType(&SemaRef, TheCall, getASTContext().FloatTy); + break; + } } return false; } diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl new file mode 100644 index 0000000000000..65dba664bb5ea --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f16tof32-builtin.hlsl @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef nofpclass(nan inf) float +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0) +// CHECK: ret float %hlsl.f16tof32 +// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32) +float test_scalar(uint p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <2 x float> +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0) +// CHECK: ret <2 x float> %hlsl.f16tof32 +// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>) +float2 test_uint2(uint2 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0) +// CHECK: ret <3 x float> %hlsl.f16tof32 +// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>) +float3 test_uint3(uint3 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0) +// CHECK: ret <4 x float> %hlsl.f16tof32 +// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>) +float4 test_uint4(uint4 p0) { return __builtin_hlsl_elementwise_f16tof32(p0); } + + + diff --git a/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl new file mode 100644 index 0000000000000..b68bc197f16c5 --- /dev/null +++ b/clang/test/CodeGenHLSL/builtins/f16tof32.hlsl @@ -0,0 +1,30 @@ +// RUN: %clang_cc1 -std=hlsl202x -finclude-default-header -x hlsl -triple \ +// RUN: dxil-pc-shadermodel6.3-library %s -emit-llvm -disable-llvm-passes \ +// RUN: -o - | FileCheck %s + +// CHECK: define hidden noundef nofpclass(nan inf) float +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %0) +// CHECK: ret float %hlsl.f16tof32 +// CHECK: declare float @llvm.dx.legacyf16tof32.i32(i32) +float test_scalar(uint p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <2 x float> +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %0) +// CHECK: ret <2 x float> %hlsl.f16tof32 +// CHECK: declare <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32>) +float2 test_uint2(uint2 p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %0) +// CHECK: ret <3 x float> %hlsl.f16tof32 +// CHECK: declare <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32>) +float3 test_uint3(uint3 p0) { return f16tof32(p0); } + +// CHECK: define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) #0 { +// CHECK: %hlsl.f16tof32 = call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %0) +// CHECK: ret <4 x float> %hlsl.f16tof32 +// CHECK: declare <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32>) +float4 test_uint4(uint4 p0) { return f16tof32(p0); } + + + diff --git a/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl new file mode 100644 index 0000000000000..8f2f9308ed966 --- /dev/null +++ b/clang/test/SemaHLSL/BuiltIns/f16tof32-errors.hlsl @@ -0,0 +1,134 @@ +// RUN: %clang_cc1 -finclude-default-header -x hlsl -triple dxil-pc-shadermodel6.6-library %s -fnative-int16-type -emit-llvm-only -disable-llvm-passes -verify + +float builtin_f16tof32_too_few_arg() { + return __builtin_hlsl_elementwise_f16tof32(); + // expected-error@-1 {{too few arguments to function call, expected 1, have 0}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 0 were provided}} +} + +float builtin_f16tof32_too_many_arg(uint p0) { + return __builtin_hlsl_elementwise_f16tof32(p0, p0); + // expected-error@-1 {{too many arguments to function call, expected 1, have 2}} + // expected-note@hlsl/hlsl_alias_intrinsics.h:* 4 {{candidate function not viable: requires 1 argument, but 2 were provided}} +} + +float builtin_f16tof32_bool(bool p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}} +} + +float builtin_f16tof32_bool4(bool4 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool4' (aka 'vector')}} +} + +float builtin_f16tof32_short(short p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}} +} + +float builtin_f16tof32_unsigned_short(unsigned short p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} +} + +float builtin_f16tof32_int(int p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}} +} + +float builtin_f16tof32_int64_t(long p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}} +} + +float2 builtin_f16tof32_int2_to_float2_promotion(int2 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int2' (aka 'vector'))}} +} + +float builtin_f16tof32_half(half p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}} +} + +float builtin_f16tof32_half4(half4 p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half4' (aka 'vector'))}} +} + +float builtin_f16tof32_float(float p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}} +} + +float builtin_f16tof32_double(double p0) { + return __builtin_hlsl_elementwise_f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}} +} + +float f16tof32_too_few_arg() { + return f16tof32(); + // expected-error@-1 {{no matching function for call to 'f16tof32'}} +} + +float f16tof32_too_many_arg(uint p0) { + return f16tof32(p0, p0); + // expected-error@-1 {{no matching function for call to 'f16tof32'}} +} + +float f16tof32_bool(bool p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool')}} +} + +float f16tof32_bool3(bool3 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'bool3' (aka 'vector'))}} +} + + +float f16tof32_int16_t(short p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'short')}} +} + +float f16tof32_int16_t(unsigned short p0) { + return f16tof32(p0); + // expected-error@-1 {{incorrect number of bits in integer (expected 32 bits, have 16)}} +} + +float f16tof32_int(int p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int')}} +} + +float f16tof32_int64_t(long p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'long')}} +} + +float2 f16tof32_int2_to_float2_promotion(int3 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'int3' (aka 'vector'))}} +} + +float f16tof32_half(half p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half')}} +} + +float f16tof32_half2(half2 p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'half2' (aka 'vector'))}} +} + +float f16tof32_float(float p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'float')}} +} + +float f16tof32_double(double p0) { + return f16tof32(p0); + // expected-error@-1 {{1st argument must be a scalar or vector of unsigned integer types (was 'double')}} +} diff --git a/llvm/include/llvm/IR/IntrinsicsDirectX.td b/llvm/include/llvm/IR/IntrinsicsDirectX.td index d6b85630eb979..9924b905aee63 100644 --- a/llvm/include/llvm/IR/IntrinsicsDirectX.td +++ b/llvm/include/llvm/IR/IntrinsicsDirectX.td @@ -140,6 +140,9 @@ def int_dx_isinf : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1 def int_dx_isnan : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>], [llvm_anyfloat_ty], [IntrNoMem]>; +def int_dx_legacyf16tof32 : DefaultAttrsIntrinsic<[LLVMScalarOrSameVectorWidth<0, llvm_float_ty>], + [llvm_anyint_ty], [IntrNoMem]>; + def int_dx_lerp : DefaultAttrsIntrinsic<[LLVMMatchType<0>], [llvm_anyfloat_ty, LLVMMatchType<0>,LLVMMatchType<0>], [IntrNoMem]>; diff --git a/llvm/include/llvm/IR/IntrinsicsSPIRV.td b/llvm/include/llvm/IR/IntrinsicsSPIRV.td index bc51fb639fd75..f39c6cda2c579 100644 --- a/llvm/include/llvm/IR/IntrinsicsSPIRV.td +++ b/llvm/include/llvm/IR/IntrinsicsSPIRV.td @@ -200,4 +200,7 @@ def int_spv_resource_nonuniformindex def int_spv_generic_cast_to_ptr_explicit : DefaultAttrsIntrinsic<[llvm_anyptr_ty], [generic_ptr_ty], [IntrNoMem, NoUndef]>; + + def int_spv_unpackhalf2x16 : DefaultAttrsIntrinsic<[llvm_anyfloat_ty], [llvm_i32_ty], [IntrNoMem]>; + } diff --git a/llvm/lib/Target/DirectX/DXIL.td b/llvm/lib/Target/DirectX/DXIL.td index 7ae500a55b92d..67437f6969b27 100644 --- a/llvm/lib/Target/DirectX/DXIL.td +++ b/llvm/lib/Target/DirectX/DXIL.td @@ -1079,6 +1079,15 @@ def WaveActiveOp : DXILOp<119, waveActiveOp> { let attributes = [Attributes]; } +def LegacyF16ToF32 : DXILOp<131, legacyF16ToF32> { + let Doc = "returns the float16 stored in the low-half of the uint converted " + "to a float"; + let intrinsics = [IntrinSelect]; + let arguments = [Int32Ty]; + let result = FloatTy; + let stages = [Stages]; +} + def WaveAllBitCount : DXILOp<135, waveAllOp> { let Doc = "returns the count of bits set to 1 across the wave"; let intrinsics = [IntrinSelect]; diff --git a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp index 60dfd9650937c..6cacbf6564db2 100644 --- a/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp +++ b/llvm/lib/Target/DirectX/DirectXTargetTransformInfo.cpp @@ -29,11 +29,12 @@ bool DirectXTTIImpl::isTargetIntrinsicWithOverloadTypeAtArg(Intrinsic::ID ID, int OpdIdx) const { switch (ID) { case Intrinsic::dx_asdouble: - case Intrinsic::dx_isinf: - case Intrinsic::dx_isnan: case Intrinsic::dx_firstbitlow: - case Intrinsic::dx_firstbituhigh: case Intrinsic::dx_firstbitshigh: + case Intrinsic::dx_firstbituhigh: + case Intrinsic::dx_isinf: + case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: return OpdIdx == 0; default: return OpdIdx == -1; @@ -50,6 +51,7 @@ bool DirectXTTIImpl::isTargetIntrinsicTriviallyScalarizable( case Intrinsic::dx_frac: case Intrinsic::dx_isinf: case Intrinsic::dx_isnan: + case Intrinsic::dx_legacyf16tof32: case Intrinsic::dx_rsqrt: case Intrinsic::dx_saturate: case Intrinsic::dx_splitdouble: diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index 3f0424f436c72..245e5a2894604 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -3516,6 +3516,10 @@ bool SPIRVInstructionSelector::selectIntrinsic(Register ResVReg, case Intrinsic::spv_resource_nonuniformindex: { return selectResourceNonUniformIndex(ResVReg, ResType, I); } + case Intrinsic::spv_unpackhalf2x16: { + return selectExtInst(ResVReg, ResType, I, GL::UnpackHalf2x16); + } + default: { std::string DiagMsg; raw_string_ostream OS(DiagMsg); diff --git a/llvm/test/CodeGen/DirectX/f16tof32.ll b/llvm/test/CodeGen/DirectX/f16tof32.ll new file mode 100644 index 0000000000000..edc5c1942e8bd --- /dev/null +++ b/llvm/test/CodeGen/DirectX/f16tof32.ll @@ -0,0 +1,57 @@ +; RUN: opt -S -dxil-intrinsic-expansion -scalarizer -dxil-op-lower -mtriple=dxil-pc-shadermodel6.9-library %s | FileCheck %s + +define hidden noundef nofpclass(nan inf) float @_Z11test_scalarj(i32 noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK : [[UINT:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 %p0) + ; CHECK : ret float [[UINT]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn float @llvm.dx.legacyf16tof32.i32(i32 %p0) + ret float %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <2 x float> @_Z10test_uint2Dv2_j(<2 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT2_0:%.*]] = extractelement <2 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_0]]) + ; CHECK: [[UINT2_1:%.*]] = extractelement <2 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT2_1]]) + ; CHECK: [[FLOAT2_0:%.*]] = insertelement <2 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT2_1:%.*]] = insertelement <2 x float> [[FLOAT2_0]], float [[FLOAT_1]], i64 1 + ; CHECK : ret <2 x float> [[FLOAT2_1]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.dx.legacyf16tof32.v2i32(<2 x i32> %p0) + ret <2 x float> %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <3 x float> @_Z10test_uint3Dv3_j(<3 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT3_0:%.*]] = extractelement <3 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_0]]) + ; CHECK: [[UINT3_1:%.*]] = extractelement <3 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_1]]) + ; CHECK: [[UINT3_2:%.*]] = extractelement <3 x i32> %p0, i64 2 + ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT3_2]]) + ; CHECK: [[FLOAT3_0:%.*]] = insertelement <3 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT3_1:%.*]] = insertelement <3 x float> [[FLOAT3_0]], float [[FLOAT_1]], i64 1 + ; CHECK: [[FLOAT3_2:%.*]] = insertelement <3 x float> [[FLOAT3_1]], float [[FLOAT_2]], i64 2 + ; CHECK : ret <3 x float> [[FLOAT3_2]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <3 x float> @llvm.dx.legacyf16tof32.v3i32(<3 x i32> %p0) + ret <3 x float> %hlsl.f16tof32 +} + +define hidden noundef nofpclass(nan inf) <4 x float> @_Z10test_uint4Dv4_j(<4 x i32> noundef %p0) local_unnamed_addr #0 { +entry: + ; CHECK: [[UINT4_0:%.*]] = extractelement <4 x i32> %p0, i64 0 + ; CHECK: [[FLOAT_0:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_0]]) + ; CHECK: [[UINT4_1:%.*]] = extractelement <4 x i32> %p0, i64 1 + ; CHECK: [[FLOAT_1:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_1]]) + ; CHECK: [[UINT4_2:%.*]] = extractelement <4 x i32> %p0, i64 2 + ; CHECK: [[FLOAT_2:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_2]]) + ; CHECK: [[UINT4_3:%.*]] = extractelement <4 x i32> %p0, i64 3 + ; CHECK: [[FLOAT_3:%.*]] = call float @dx.op.legacyF16ToF32(i32 131, i32 [[UINT4_3]]) + ; CHECK: [[FLOAT4_0:%.*]] = insertelement <4 x float> poison, float [[FLOAT_0]], i64 0 + ; CHECK: [[FLOAT4_1:%.*]] = insertelement <4 x float> [[FLOAT4_0]], float [[FLOAT_1]], i64 1 + ; CHECK: [[FLOAT4_2:%.*]] = insertelement <4 x float> [[FLOAT4_1]], float [[FLOAT_2]], i64 2 + ; CHECK: [[FLOAT4_3:%.*]] = insertelement <4 x float> [[FLOAT4_2]], float [[FLOAT_3]], i64 3 + ; CHECK : ret <4 x float> [[FLOAT4_3]] + %hlsl.f16tof32 = tail call reassoc nnan ninf nsz arcp afn <4 x float> @llvm.dx.legacyf16tof32.v4i32(<4 x i32> %p0) + ret <4 x float> %hlsl.f16tof32 +} diff --git a/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll new file mode 100644 index 0000000000000..6a9ce4515f5c0 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/unpackfloat2x16.ll @@ -0,0 +1,18 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: [[SET:%.*]] = OpExtInstImport "GLSL.std.450" +; CHECK-DAG: [[UINT:%.*]] = OpTypeInt 32 0 +; CHECK-DAG: [[FLOAT:%.*]] = OpTypeFloat 32 +; CHECK-DAG: [[FLOAT2:%.*]] = OpTypeVector [[FLOAT]] 2 + +; CHECK: [[P0:%.*]] = OpFunctionParameter [[UINT]] +; CHECK: [[UNPACK2:%.*]] = OpExtInst [[FLOAT2]] [[SET]] UnpackHalf2x16 [[P0]] +; CHECK: [[UNPACK:%.*]] = OpCompositeExtract [[FLOAT]] [[UNPACK2]] 0 +; CHECK: OpReturnValue [[UNPACK]] +define hidden spir_func noundef nofpclass(nan inf) float @_Z9test_funcj(i32 noundef %0) local_unnamed_addr #0 { + %2 = tail call reassoc nnan ninf nsz arcp afn <2 x float> @llvm.spv.unpackhalf2x16.v2f32(i32 %0) + %3 = extractelement <2 x float> %2, i64 0 + ret float %3 +} +