diff --git a/include/hlslcc.h b/include/hlslcc.h index e3132a2..9b3f48d 100644 --- a/include/hlslcc.h +++ b/include/hlslcc.h @@ -39,6 +39,7 @@ typedef struct { uint32_t ARB_explicit_attrib_location : 1; uint32_t ARB_explicit_uniform_location : 1; uint32_t ARB_shading_language_420pack : 1; + uint32_t GL_KHR_vulkan_glsl : 1; }GlExtensions; enum {MAX_SHADER_VEC4_OUTPUT = 512}; @@ -159,6 +160,11 @@ typedef enum REFLECT_RESOURCE_DIMENSION REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, } REFLECT_RESOURCE_DIMENSION; +typedef enum REFLECT_RESOURCE_FLAGS +{ + REFLECT_RESOURCE_FLAGS_COMPARISON_SAMPLER = 0x2, +} REFLECT_RESOURCE_FLAGS; + typedef struct ResourceBinding_TAG { char Name[MAX_REFLECT_STRING_LENGTH]; @@ -324,6 +330,25 @@ typedef struct TextureSamplerInfo_TAG TextureSamplerPair aTextureSamplerPair[MAX_RESOURCE_BINDINGS]; } TextureSamplerInfo; +typedef enum GLSL_BINDING_FLAGS +{ + GLSL_BINDING_TYPE_PUSHCONSTANTS = 1<<0, +} GLSL_BINDING_FLAGS; + +typedef struct GLSLResourceBinding_TAG { + uint32_t _locationIndex; + uint32_t _bindingIndex; + uint32_t _setIndex; + uint32_t _flags; // GLSL_BINDING_FLAGS +} GLSLResourceBinding; + +typedef uint32_t (*EvaluateBindingFn)( + void* userData, + GLSLResourceBinding* dstBinding, + ResourceBinding* srcResBinding, + ConstantBuffer* srcCBBinding, + uint32_t bindPoint, uint32_t shaderStage); + typedef struct ShaderInfo_TAG { uint32_t ui32MajorVersion; @@ -446,6 +471,9 @@ static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400; //If set, global uniforms are not stored in a struct. static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800; +//If set, no dummy materials will generated for Vulkan +static const unsigned int HLSLCC_FLAG_DISABLE_VULKAN_DUMMIES = 0x1000; + #ifdef __cplusplus extern "C" { #endif @@ -460,14 +488,17 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, GLLang language, const GlExtensions *extensions, GLSLCrossDependencyData* dependencies, - GLSLShader* result - ); + EvaluateBindingFn evaluateBindingFn, + void* evaluateBindingData, + GLSLShader* result); HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, unsigned int flags, GLLang language, const GlExtensions *extensions, GLSLCrossDependencyData* dependencies, + EvaluateBindingFn evaluateBindingFn, + void* evaluateBindingData, GLSLShader* result); HLSLCC_API void HLSLCC_APIENTRY FreeGLSLShader(GLSLShader*); diff --git a/mk/CMakeLists.txt b/mk/CMakeLists.txt index bceb17b..68f4641 100644 --- a/mk/CMakeLists.txt +++ b/mk/CMakeLists.txt @@ -17,8 +17,15 @@ IF( BUILD_SHARED_LIBS) ADD_DEFINITIONS(-DHLSLCC_DYNLIB) ENDIF( BUILD_SHARED_LIBS ) -SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY "../bin" ) -SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY "../lib" ) +OPTION(TARGET_32BIT "Target 32 bit architexture" OFF) + +IF(TARGET_32BIT) + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY "../bin32" ) + SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY "../lib32" ) +ELSE() + SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY "../bin64" ) + SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY "../lib64" ) +ENDIF() #INCLUDE(TestBigEndian) diff --git a/offline/toGLSLStandalone.cpp b/offline/toGLSLStandalone.cpp index 0013971..5a71a1f 100644 --- a/offline/toGLSLStandalone.cpp +++ b/offline/toGLSLStandalone.cpp @@ -16,7 +16,7 @@ #include "timer.h" #if defined(_WIN32) -#define VALIDATE_OUTPUT +//#define VALIDATE_OUTPUT #endif #if defined(VALIDATE_OUTPUT) @@ -605,7 +605,8 @@ int Run(const char* srcPath, const char* destPath, GLLang language, int flags, c ext.ARB_explicit_attrib_location = 0; ext.ARB_explicit_uniform_location = 0; ext.ARB_shading_language_420pack = 0; - compiledOK = TranslateHLSLFromFile(srcPath, flags, language, &ext , dependencies, &result); + ext.GL_KHR_vulkan_glsl = 0; + compiledOK = TranslateHLSLFromFile(srcPath, flags, language, &ext , dependencies, NULL, NULL, &result); crossCompileTime = ReadTimer(&timer); if(compiledOK) @@ -639,6 +640,10 @@ int Run(const char* srcPath, const char* destPath, GLLang language, int flags, c FreeGLSLShader(&result); } + else + { + printf("HLSLcc failed\n"); + } return compiledOK; } diff --git a/src/decode.c b/src/decode.c index c292673..c61aef7 100644 --- a/src/decode.c +++ b/src/decode.c @@ -10,18 +10,18 @@ #include "internal_includes/toGLSLOperand.h" #define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) -static enum {FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C')}; //DirectX byte code -static enum {FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R')}; //Shader model 4 code -static enum {FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X')}; //Shader model 5 code -static enum {FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F')}; //Resource definition (e.g. constant buffers) -static enum {FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N')}; //Input signature -static enum {FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E')}; //Interface (for dynamic linking) -static enum {FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N')}; //Output signature -static enum {FOURCC_PSGN = FOURCC('P', 'C', 'S', 'G')}; //Patch-constant signature - -static enum {FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1')}; //Input signature with Stream and MinPrecision -static enum {FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1')}; //Output signature with Stream and MinPrecision -static enum {FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5')}; //Output signature with Stream +enum {FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C')}; //DirectX byte code +enum {FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R')}; //Shader model 4 code +enum {FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X')}; //Shader model 5 code +enum {FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F')}; //Resource definition (e.g. constant buffers) +enum {FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N')}; //Input signature +enum {FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E')}; //Interface (for dynamic linking) +enum {FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N')}; //Output signature +enum {FOURCC_PSGN = FOURCC('P', 'C', 'S', 'G')}; //Patch-constant signature + +enum {FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1')}; //Input signature with Stream and MinPrecision +enum {FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1')}; //Output signature with Stream and MinPrecision +enum {FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5')}; //Output signature with Stream typedef struct DXBCContainerHeaderTAG { @@ -269,6 +269,12 @@ uint32_t DecodeOperand (const uint32_t *pui32Tokens, Operand* psOperand) psOperand->aeDataType[0] = SVT_UINT; } + if (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID) + { + eNumComponents = OPERAND_1_COMPONENT; + psOperand->aeDataType[0] = SVT_INT; + } + switch(eNumComponents) { case OPERAND_1_COMPONENT: @@ -444,6 +450,8 @@ const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, } case OPCODE_DCL_SAMPLER: { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token+ui32OperandOffset, &psDecl->asOperands[0]); break; } case OPCODE_DCL_INDEX_RANGE: @@ -1023,8 +1031,10 @@ const uint32_t* DeocdeInstruction(const uint32_t* pui32Token, Instruction* psIns //Intentional fall-through } case OPCODE_IMIN: + case OPCODE_UMIN: case OPCODE_MIN: case OPCODE_IMAX: + case OPCODE_UMAX: case OPCODE_MAX: case OPCODE_MUL: case OPCODE_DIV: @@ -1078,6 +1088,7 @@ const uint32_t* DeocdeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_MAD: case OPCODE_MOVC: case OPCODE_IMAD: + case OPCODE_UMAD: case OPCODE_UDIV: case OPCODE_LOD: case OPCODE_SAMPLE: @@ -1098,10 +1109,11 @@ const uint32_t* DeocdeInstruction(const uint32_t* pui32Token, Instruction* psIns case OPCODE_DMOVC: case OPCODE_DFMA: case OPCODE_IMUL: + case OPCODE_UMUL: { psInst->ui32NumOperands = 4; - if(eOpcode == OPCODE_IMUL) + if(eOpcode == OPCODE_IMUL || eOpcode == OPCODE_UMUL) { psInst->ui32FirstSrc = 2; } diff --git a/src/decodeDX9.c b/src/decodeDX9.c index 2495529..cf7833d 100644 --- a/src/decodeDX9.c +++ b/src/decodeDX9.c @@ -9,7 +9,7 @@ #include "internal_includes/hlslcc_malloc.h" #define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) -static enum {FOURCC_CTAB = FOURCC('C', 'T', 'A', 'B')}; //Constant table +enum {FOURCC_CTAB = FOURCC('C', 'T', 'A', 'B')}; //Constant table #ifdef _DEBUG static uint64_t operandID = 0; diff --git a/src/internal_includes/languages.h b/src/internal_includes/languages.h index 1f8dbd2..ed400a0 100644 --- a/src/internal_includes/languages.h +++ b/src/internal_includes/languages.h @@ -86,6 +86,25 @@ static int HaveUniformBindingsAndLocations(const GLLang eLang,const struct GlExt return 0; } +static int HaveBindingQualifier(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags) +{ + return + (eLang >= LANG_420) + || (extensions && ((GlExtensions*)extensions)->ARB_shading_language_420pack); +} + +static int HaveSeparateTexturesAndSamplers(const GLLang eLang, const struct GlExtensions *extensions) +{ + return extensions && ((GlExtensions*)extensions)->GL_KHR_vulkan_glsl; +} + +static int HaveScalarSwizzle(const GLLang eLang, const struct GlExtensions *extensions) +{ + return + (eLang >= LANG_420) + || (extensions && ((GlExtensions*)extensions)->ARB_shading_language_420pack); +} + static int DualSourceBlendSupported(const GLLang eLang) { if(eLang >= LANG_330) @@ -104,6 +123,35 @@ static int SubroutinesSupported(const GLLang eLang) return 0; } +static int HasImageSizeFunction(const GLLang eLang) +{ + if(eLang >= LANG_430) + { + return 1; + } + return 0; +} + +static int HasInterfaceComponentQualifier(const GLLang eLang) +{ + // Allows for the use of "component" layout qualifier attached + // to interface components. + if (eLang >= LANG_440) + { + return 1; + } + return 0; +} + +static int UseSPIRVNames(const GLLang eLang,const struct GlExtensions *extensions) +{ + if(extensions && ((GlExtensions*)extensions)->GL_KHR_vulkan_glsl) + { + return 1; + } + return 0; +} + //Before 430, flat/smooth/centroid/noperspective must match //between fragment and its previous stage. //HLSL bytecode only tells us the interpolation in pixel shader. diff --git a/src/internal_includes/reflect.h b/src/internal_includes/reflect.h index d1c0c1b..cca4753 100644 --- a/src/internal_includes/reflect.h +++ b/src/internal_includes/reflect.h @@ -11,7 +11,7 @@ void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_ int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderInfo* psShaderInfo, ShaderVar** ppsShaderVar); -int GetInputSignatureFromRegister(const uint32_t ui32Register, const ShaderInfo* psShaderInfo, InOutSignature** ppsOut); +int GetInputSignatureFromRegister(const uint32_t ui32Register, const int eSelMode, uint32_t ui32CompMask, const ShaderInfo* psShaderInfo, InOutSignature** ppsOut); int GetOutputSignatureFromRegister(const uint32_t currentPhase, const uint32_t ui32Register, const uint32_t ui32Stream, diff --git a/src/internal_includes/structs.h b/src/internal_includes/structs.h index 241c851..83f8b2d 100644 --- a/src/internal_includes/structs.h +++ b/src/internal_includes/structs.h @@ -157,14 +157,14 @@ enum {MAX_TEMP_VEC4 = 512}; enum {MAX_GROUPSHARED = 8}; -static enum {MAX_DX9_IMMCONST = 256}; +enum {MAX_DX9_IMMCONST = 256}; static const uint32_t MAIN_PHASE = 0; static const uint32_t HS_GLOBAL_DECL = 1; static const uint32_t HS_CTRL_POINT_PHASE = 2; static const uint32_t HS_FORK_PHASE = 3; static const uint32_t HS_JOIN_PHASE = 4; -static enum{ NUM_PHASES = 5}; +enum{ NUM_PHASES = 5}; typedef struct ShaderPhase_TAG { @@ -222,6 +222,7 @@ typedef struct Shader_TAG int aIndexedInputParents[MAX_SHADER_VEC4_INPUT]; RESOURCE_DIMENSION aeResourceDims[MAX_TEXTURES]; + RESOURCE_DIMENSION aeUAVResourceDims[MAX_TEXTURES]; int aiInputDeclaredSize[MAX_SHADER_VEC4_INPUT]; @@ -257,6 +258,8 @@ typedef struct HLSLCrossCompilerContext_TAG unsigned int flags; Shader* psShader; GLSLCrossDependencyData* psDependencies; + EvaluateBindingFn pEvaluateBindingFn; + void* pEvaluateBindingData; } HLSLCrossCompilerContext; #endif diff --git a/src/internal_includes/toGLSLDeclaration.h b/src/internal_includes/toGLSLDeclaration.h index bf6c6bc..6dcc982 100644 --- a/src/internal_includes/toGLSLDeclaration.h +++ b/src/internal_includes/toGLSLDeclaration.h @@ -12,5 +12,6 @@ const char* GetDeclaredOutputName(const HLSLCrossCompilerContext* psContext, con //Each phase has its own temps. //Convert to global temps for GLSL. void ConsolidateHullTempVars(Shader* psShader); +void TranslateDeclaration_HS_NoControlPointStage(HLSLCrossCompilerContext* psContext); #endif diff --git a/src/internal_includes/toGLSLOperand.h b/src/internal_includes/toGLSLOperand.h index 5116aef..2d3139c 100644 --- a/src/internal_includes/toGLSLOperand.h +++ b/src/internal_includes/toGLSLOperand.h @@ -35,9 +35,13 @@ void TranslateOperandIndexMAD(HLSLCrossCompilerContext* psContext, const Operand void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand); void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask); +uint32_t GetNumberBitsSet(uint32_t a); uint32_t GetNumSwizzleElements(const Operand* psOperand); uint32_t GetNumSwizzleElementsWithMask(const Operand *psOperand, uint32_t ui32CompMask); +uint32_t GetOrderedSwizzleElements(const Operand *psOperand, uint32_t ui32CompMask, uint32_t result[4]); void AddSwizzleUsingElementCount(HLSLCrossCompilerContext* psContext, uint32_t count); +void AddSwizzleUsingOrderedElements(HLSLCrossCompilerContext* psContext, const Operand *psOperand, uint32_t ui32CompMask); +void AddSwizzleUsingOrderedElementsDstMask(HLSLCrossCompilerContext* psContext, const Operand *psSrcOperand, const Operand *psMaskingOperand); int GetFirstOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand); uint32_t IsSwizzleReplicated(const Operand* psOperand); diff --git a/src/reflect.c b/src/reflect.c index e770248..0c3bbb0 100644 --- a/src/reflect.c +++ b/src/reflect.c @@ -585,20 +585,25 @@ int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderInfo* psShaderInfo, Sha return 0; } -int GetInputSignatureFromRegister(const uint32_t ui32Register, const ShaderInfo* psShaderInfo, InOutSignature** ppsOut) +int GetInputSignatureFromRegister(const uint32_t ui32Register, const int eSelMode, uint32_t ui32CompMask, const ShaderInfo* psShaderInfo, InOutSignature** ppsOut) { uint32_t i; const uint32_t ui32NumVars = psShaderInfo->ui32NumInputSignatures; + if (eSelMode != OPERAND_4_COMPONENT_MASK_MODE) + ui32CompMask = 0; + for(i=0; ipsInputSignatures; - if(ui32Register == psInputSignatures[i].ui32Register) + if(ui32Register == psInputSignatures[i].ui32Register && + ((ui32CompMask == 0)||(ui32CompMask & psInputSignatures[i].ui32Mask))) { *ppsOut = psInputSignatures+i; return 1; } } + ppsOut = NULL; return 0; } @@ -742,25 +747,23 @@ static int IsOffsetInType(ShaderVarType* psType, { pi32Index[0] = (offsetToFind - thisOffset) / 16; } - else if(psType->Class == SVC_VECTOR && psType->Columns > 1) - { - //Check for vector starting at a non-vec4 offset. + + //Check for vector starting at a non-vec4 offset. - // cbuffer $Globals - // { - // - // float angle; // Offset: 0 Size: 4 - // float2 angle2; // Offset: 4 Size: 8 - // - // } + // cbuffer $Globals + // { + // + // float angle; // Offset: 0 Size: 4 + // float2 angle2; // Offset: 4 Size: 8 + // + // } - //cb0[0].x = angle - //cb0[0].yzyy = angle2.xyxx + //cb0[0].x = angle + //cb0[0].yzyy = angle2.xyxx - //Rebase angle2 so that .y maps to .x, .z maps to .y + //Rebase angle2 so that .y maps to .x, .z maps to .y - pi32Rebase[0] = thisOffset % 16; - } + pi32Rebase[0] = thisOffset % 16; return 1; } diff --git a/src/toGLSL.c b/src/toGLSL.c index fad6707..4f83d7e 100644 --- a/src/toGLSL.c +++ b/src/toGLSL.c @@ -77,6 +77,14 @@ void AddIndentation(HLSLCrossCompilerContext* psContext) } } +void WriteUniformLayout( + HLSLCrossCompilerContext* psContext, + ResourceBinding* srcResBinding, + ConstantBuffer* srcCBBinding, + unsigned ui32BindingPoint, unsigned shaderStage, + const char* extraLayoutQualifiers, + bstring glsl); + void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) { bstring glsl = *psContext->currentGLSLString; @@ -207,11 +215,13 @@ void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) } bcatcstr(glsl, "\n"); } - - if (SubroutinesSupported(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl, "subroutine void SubroutineType();\n"); - } + + // DavidJ -- Removed this line. It causes a compile error with the SPIR-V compiler (which doesn't support subroutines) + // It's not clear to me what the intention of this line is. + // if (SubroutinesSupported(psContext->psShader->eTargetLanguage)) + // { + // bcatcstr(glsl, "subroutine void SubroutineType();\n"); + // } if (psContext->psShader->ui32MajorVersion <= 3) { @@ -321,20 +331,71 @@ void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) } /* For versions which do not support a vec1 (currently all versions) */ - bcatcstr(glsl,"struct vec1 {\n"); - bcatcstr(glsl,"\tfloat x;\n"); - bcatcstr(glsl,"};\n"); - - if(HaveUVec(psContext->psShader->eTargetLanguage)) - { - bcatcstr(glsl,"struct uvec1 {\n"); - bcatcstr(glsl,"\tuint x;\n"); - bcatcstr(glsl,"};\n"); - } + // DavidJ -- Replacing this with a define. + // Using a struct was causing problems with code like: + // vec1 value; + // value = 0.5f; + // Here, the ".x" could be ommitted after "value" because + // both sides are determined to have the same components. + // That's fine with true vectors; but does not produce the + // correct result when using a "struct" to stand in for vec1. + // + // Note -- this requires GLSL 4.20 or ARB_shading_language_420pack to work + // correctly. When targetting older versions of OpenGL, some input + // HLSL code will generate incorrect results. + if (HaveScalarSwizzle(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + bcatcstr(glsl, "#define vec1 float\n"); + if(HaveUVec(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "#define uvec1 uint\n"); + bcatcstr(glsl, "#define ivec1 int\n"); + } + else + { + bcatcstr(glsl,"struct vec1 {\n"); + bcatcstr(glsl,"\tfloat x;\n"); + bcatcstr(glsl,"};\n"); + + if(HaveUVec(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl,"struct uvec1 {\n"); + bcatcstr(glsl,"\tuint x;\n"); + bcatcstr(glsl,"};\n"); + } + + bcatcstr(glsl,"struct ivec1 {\n"); + bcatcstr(glsl,"\tint x;\n"); + bcatcstr(glsl,"};\n"); + } - bcatcstr(glsl,"struct ivec1 {\n"); - bcatcstr(glsl,"\tint x;\n"); - bcatcstr(glsl,"};\n"); + // In HLSL, we can use "Load" on a texture object and access it without a sampler. + // For GLSL, we have 2 options: + // * use a gtexture2d/gsampler2d and access with texelFetch + // * use a image2d and access with imageLoad + // + // If we use imageLoad, we don't need a sampler. This is a closer analogue to HLSL. + // If we use gtexture2d, we need to use a sampler. But we can also call other texture + // access functions (like texture, textureGather). + // + // It seems like "images" will be necessary if we need to perform atomic operations on + // the data (ie, like RWTextures in HLSL). + // But for HLSL "texture" objects, perhaps it's best to just stick with gtexture2d/gsampler2d + // objects for consistancy. But, we will need to declare a dummy sampler to use with texelFetch. + if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) { + ResourceBinding resBinding; + strcpy(resBinding.Name, "hlslcc_DummySampler"); + resBinding.eType = RTYPE_SAMPLER; + resBinding.ui32BindPoint = 16; + resBinding.ui32BindCount = 1; + resBinding.ui32Flags = 0; + resBinding.eDimension = REFLECT_RESOURCE_DIMENSION_UNKNOWN; + resBinding.ui32ReturnType = 0; + resBinding.ui32NumSamples = 0; + WriteUniformLayout( + psContext, &resBinding, NULL, resBinding.ui32BindPoint, + psContext->psShader->eShaderType, NULL, glsl); + bcatcstr(glsl, "uniform sampler hlslcc_DummySampler;\n"); + } /* OpenGL 4.1 API spec: @@ -492,6 +553,7 @@ void TranslateToGLSL(HLSLCrossCompilerContext* psContext, GLLang* planguage,cons bcatcstr(glsl,"#extension GL_ARB_explicit_uniform_location : require\n"); if(extensions->ARB_shading_language_420pack) bcatcstr(glsl,"#extension GL_ARB_shading_language_420pack : require\n"); + // extension GL_KHR_vulkan_glsl doesn't need to be declared } ClearDependencyData(psShader->eShaderType, psContext->psDependencies); @@ -522,15 +584,25 @@ void TranslateToGLSL(HLSLCrossCompilerContext* psContext, GLLang* planguage,cons asPhaseFuncNames[HS_JOIN_PHASE] = "join_phase"; ConsolidateHullTempVars(psShader); + int ControlPointCountValue = 1; for(i=0; i < psShader->asPhase[HS_GLOBAL_DECL].pui32DeclCount[0]; ++i) { - TranslateDeclaration(psContext, psShader->asPhase[HS_GLOBAL_DECL].ppsDecl[0]+i); + Declaration* ppsDecl = psShader->asPhase[HS_GLOBAL_DECL].ppsDecl[0] + i; + TranslateDeclaration(psContext, ppsDecl); + if (ppsDecl->eOpcode == OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT) ControlPointCountValue = ppsDecl->value.ui32MaxOutputVertexCount; } for(ui32Phase=HS_CTRL_POINT_PHASE; ui32PhasecurrentPhase = ui32Phase; + int DummyControlPointPhase = (ui32Phase == HS_CTRL_POINT_PHASE) && (psShader->asPhase[HS_CTRL_POINT_PHASE].ui32InstanceCount == 0); + + if (DummyControlPointPhase) + { + TranslateDeclaration_HS_NoControlPointStage(psContext); + } + for(ui32Instance = 0; ui32Instance < psShader->asPhase[ui32Phase].ui32InstanceCount; ++ui32Instance) { isCurrentForkPhasedInstanced = 0; //reset for each fork phase for cases we don't have a fork phase instance count opcode. @@ -565,7 +637,7 @@ void TranslateToGLSL(HLSLCrossCompilerContext* psContext, GLLang* planguage,cons TranslateInstruction(psContext, psShader->asPhase[ui32Phase].ppsInst[ui32Instance]+i, NULL); } - if(haveInstancedForkPhase) + if(haveInstancedForkPhase||(ui32Phase==HS_CTRL_POINT_PHASE)) { psContext->indent--; AddIndentation(psContext); @@ -607,30 +679,62 @@ void TranslateToGLSL(HLSLCrossCompilerContext* psContext, GLLang* planguage,cons AddIndentation(psContext); bcatcstr(glsl, "//--- End Early Main ---\n"); #endif - - ui32PhaseFuncCallOrder[0] = HS_CTRL_POINT_PHASE; - ui32PhaseFuncCallOrder[1] = HS_FORK_PHASE; - ui32PhaseFuncCallOrder[2] = HS_JOIN_PHASE; - - for(ui32PhaseCallIndex=0; ui32PhaseCallIndex<3; ui32PhaseCallIndex++) + if (psShader->asPhase[HS_CTRL_POINT_PHASE].ui32InstanceCount == 0) + { //copying control points data for empty control point phase + bcatcstr(glsl, " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); + bcatcstr(glsl, " gl_out[gl_InvocationID].gl_PointSize = gl_in[gl_InvocationID].gl_PointSize;\n"); + //bcatcstr(glsl, " gl_out[gl_InvocationID].gl_ClipDistance[] = gl_in[gl_InvocationID].gl_ClipDistance[];\n"); todoooo copy all gl_ClipDistance + for (uint32_t outI = 0; outI < psShader->sInfo.ui32NumOutputSignatures; outI++) + { + InOutSignature outSeg = psShader->sInfo.psOutputSignatures[outI]; + if (outSeg.eSystemValueType != NAME_UNDEFINED) continue; + InOutSignature inSeg; + int inIndex = -1; + for (uint32_t inI = 0; inI < psShader->sInfo.ui32NumInputSignatures; inI++) + { + inSeg = psShader->sInfo.psInputSignatures[inI]; + if (inSeg.eSystemValueType != NAME_UNDEFINED) continue; + if (inSeg.ui32Register == outSeg.ui32Register) + { + inIndex = inI; + break; + } + } + if (inIndex < 0) continue; + bformata(glsl, " %s%d[gl_InvocationID] = %s%d[gl_InvocationID];\n", outSeg.SemanticName, outSeg.ui32SemanticIndex, inSeg.SemanticName, inSeg.ui32SemanticIndex); + } + } + else + { + for (ui32Instance = 0; ui32Instance < psShader->asPhase[HS_CTRL_POINT_PHASE].ui32InstanceCount; ++ui32Instance) + { + AddIndentation(psContext); + bformata(glsl, " %s%d();\n", asPhaseFuncNames[HS_CTRL_POINT_PHASE], ui32Instance); + } + } + bcatcstr(glsl, " barrier();\n"); + bformata(glsl, " if (gl_InvocationID == %d) {\n", ControlPointCountValue-1); //all other phases should be called once + ui32PhaseFuncCallOrder[0] = HS_FORK_PHASE; + ui32PhaseFuncCallOrder[1] = HS_JOIN_PHASE; + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 2; ui32PhaseCallIndex++) { ui32Phase = ui32PhaseFuncCallOrder[ui32PhaseCallIndex]; - for(ui32Instance = 0; ui32Instance < psShader->asPhase[ui32Phase].ui32InstanceCount; ++ui32Instance) + for (ui32Instance = 0; ui32Instance < psShader->asPhase[ui32Phase].ui32InstanceCount; ++ui32Instance) { AddIndentation(psContext); - bformata(glsl, "%s%d();\n", asPhaseFuncNames[ui32Phase], ui32Instance); + bformata(glsl, " %s%d();\n", asPhaseFuncNames[ui32Phase], ui32Instance); - if(ui32Phase == HS_FORK_PHASE) + if (ui32Phase == HS_FORK_PHASE) { - if(psShader->asPhase[HS_JOIN_PHASE].ui32InstanceCount || - (ui32Instance+1 < psShader->asPhase[HS_FORK_PHASE].ui32InstanceCount)) + if (psShader->asPhase[HS_JOIN_PHASE].ui32InstanceCount || + (ui32Instance + 1 < psShader->asPhase[HS_FORK_PHASE].ui32InstanceCount)) { AddIndentation(psContext); - bcatcstr(glsl, "barrier();\n"); } } } } + if (ui32Phase>HS_CTRL_POINT_PHASE) bcatcstr(glsl, " }\n"); psContext->indent--; @@ -757,6 +861,8 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, GLLang language, const GlExtensions *extensions, GLSLCrossDependencyData* dependencies, + EvaluateBindingFn evaluateBindingFn, + void* evaluateBindingData, GLSLShader* result) { uint32_t* tokens; @@ -782,6 +888,8 @@ HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, sContext.psShader = psShader; sContext.flags = flags; sContext.psDependencies = dependencies; + sContext.pEvaluateBindingFn = evaluateBindingFn; + sContext.pEvaluateBindingData = evaluateBindingData; for(i=0; i> 27]; +} + +static int MSBBit(uint32_t x) +{ + // Returns the most significant bit set, zero based + // Like LSBBit, just using a general method (even though there might be a hardware instruction) + int r = 0; + while (x >>= 1) r++; + return r; +} + void AddToDx9ImmConstIndexableArray(HLSLCrossCompilerContext* psContext, const Operand* psOperand) { bstring* savedStringPtr = psContext->currentGLSLString; @@ -101,7 +126,14 @@ void DeclareConstBufferShaderVariable(bstring glsl, const char* Name, const stru { case SVT_FLOAT: { - bformata(glsl, "\tmat4 %s", Name); + if (psType->Class == SVC_MATRIX_COLUMNS) + { + bformata(glsl, "\tmat%ix%i %s", psType->Columns, psType->Rows, Name); + } + else + { + bformata(glsl, "\tmat%ix%i %s", psType->Rows, psType->Columns, Name); + } break; } default: @@ -248,14 +280,14 @@ const char* GetDeclaredInputName(const HLSLCrossCompilerContext* psContext, cons bstring inputName; char* cstr; InOutSignature* psIn; - int found = GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, &psContext->psShader->sInfo, &psIn); + int found = GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->eSelMode, psOperand->ui32CompMask, &psContext->psShader->sInfo, &psIn); if((psContext->flags & HLSLCC_FLAG_INOUT_SEMANTIC_NAMES) && found) { if (eShaderType == VERTEX_SHADER) /* We cannot have input and output names conflict, but vs output must match ps input. Prefix vs input. */ inputName = bformat("in_%s%d", psIn->SemanticName, psIn->ui32SemanticIndex); else - inputName = bformat("%s%d", psIn->SemanticName, psIn->ui32SemanticIndex); + inputName = bformat("%s%d", psIn->SemanticName, psIn->ui32SemanticIndex); } else if(eShaderType == GEOMETRY_SHADER) { @@ -408,7 +440,7 @@ const char* GetInterpolationString(INTERPOLATION_MODE eMode) static void DeclareInput( HLSLCrossCompilerContext* psContext, const Declaration* psDecl, - const char* Interpolation, const char* StorageQualifier, const char* Precision, int iNumComponents, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName) + const char* Interpolation, const char* StorageQualifier, const char* Precision, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName) { Shader* psShader = psContext->psShader; bstring glsl = *psContext->currentGLSLString; @@ -417,146 +449,167 @@ static void DeclareInput( if(psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber] == -1) return; - if(psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] == 0) - { - const char* vecType = "vec"; - const char* scalarType = "float"; - InOutSignature* psSignature = NULL; + int registerNotDeclared = psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] == 0; + + const char* vecType = "vec"; + const char* scalarType = "float"; + InOutSignature* psSignature = NULL; - if( GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, &psShader->sInfo, &psSignature) ) + if (GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eSelMode, psDecl->asOperands[0].ui32CompMask, &psShader->sInfo, &psSignature)) + { + switch(psSignature->eComponentType) { - switch(psSignature->eComponentType) + case INOUT_COMPONENT_UINT32: { - case INOUT_COMPONENT_UINT32: - { - vecType = "uvec"; - scalarType = "uint"; - break; - } - case INOUT_COMPONENT_SINT32: - { - vecType = "ivec"; - scalarType = "int"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } + vecType = "uvec"; + scalarType = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + vecType = "ivec"; + scalarType = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; } } + } + else psSignature = NULL; - if(psContext->psDependencies) + if(psContext->psDependencies) + { + if(psShader->eShaderType == PIXEL_SHADER) { - if(psShader->eShaderType == PIXEL_SHADER) - { - psContext->psDependencies->aePixelInputInterpolation[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eInterpolation; - } + psContext->psDependencies->aePixelInputInterpolation[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eInterpolation; } + } + + if (!psDecl->asOperands[0].iNumComponents) + return; + + int lowestComponent = 0; + int iNumComponents = 0; + if (psSignature) { + int highestComponent = MSBBit(psSignature->ui32Mask); + lowestComponent = LSBBit(psSignature->ui32Mask); + iNumComponents = highestComponent - lowestComponent + 1; + } else { + int highestComponent = MSBBit(psDecl->asOperands[0].ui32CompMask); // (zero based bit indexes) + lowestComponent = LSBBit(psDecl->asOperands[0].ui32CompMask); + iNumComponents = highestComponent - lowestComponent + 1; + } + //int iNumComponents = highestComponent - lowestComponent + 1; - if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || - (psShader->eShaderType == VERTEX_SHADER && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->flags))) + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || + (psShader->eShaderType == VERTEX_SHADER && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->flags))) + { + // Skip location if requested by the flags. + if (!(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) { - // Skip location if requested by the flags. - if (!(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) - bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + if (HasInterfaceComponentQualifier(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "layout(location = %d, component = %d) ", psDecl->asOperands[0].ui32RegisterNumber, lowestComponent); + } + else + { + bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + } } + } - switch(eIndexDim) + switch(eIndexDim) + { + case INDEX_2D: { - case INDEX_2D: - { - if(psShader->eShaderType == HULL_SHADER) + if ((psShader->eShaderType == HULL_SHADER) || (psShader->eShaderType == DOMAIN_SHADER)) + { + if(iNumComponents == 1) { - if(iNumComponents == 1) - { - const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; - const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; - - psContext->psShader->abScalarInput[psDecl->asOperands[0].ui32RegisterNumber] = -1; + const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; + const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; - bformata(glsl, "%s %s %s %s [gl_MaxPatchVertices];\n", StorageQualifier, Precision, scalarType, InputName); + psContext->psShader->abScalarInput[psDecl->asOperands[0].ui32RegisterNumber] = -1; - bformata(glsl, "%s1 Input%d;\n", vecType, psDecl->asOperands[0].ui32RegisterNumber); + if (registerNotDeclared) + bformata(glsl, "%s1 Input%d;\n", vecType, psDecl->asOperands[0].ui32RegisterNumber); - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = arraySize; - } - else - { - bformata(glsl, "%s %s %s%d %s [gl_MaxPatchVertices];\n", StorageQualifier, Precision, vecType, iNumComponents, InputName); + bformata(glsl, "%s %s %s %s [gl_MaxPatchVertices];\n", StorageQualifier, Precision, scalarType, InputName); - bformata(glsl, "%s%d Input%d[gl_MaxPatchVertices];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber); - - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->asOperands[0].aui32ArraySizes[0]; - } + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = arraySize; } else - if(iNumComponents == 1) - { - const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; - const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; + { + if (registerNotDeclared) + bformata(glsl, "%s%d Input%d[gl_MaxPatchVertices];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber); - psContext->psShader->abScalarInput[psDecl->asOperands[0].ui32RegisterNumber] = -1; + bformata(glsl, "%s %s %s%d %s [gl_MaxPatchVertices];\n", StorageQualifier, Precision, vecType, iNumComponents, InputName); - bformata(glsl, "%s %s %s %s [%d];\n", StorageQualifier, Precision, scalarType, InputName, - arraySize); + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->asOperands[0].aui32ArraySizes[0]; + } + } + else + if(iNumComponents == 1) + { + const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; + const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; - bformata(glsl, "%s1 Input%d;\n", vecType, psDecl->asOperands[0].ui32RegisterNumber); + psContext->psShader->abScalarInput[psDecl->asOperands[0].ui32RegisterNumber] = -1; - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = arraySize; - } - else - { - bformata(glsl, "%s %s %s%d %s [%d];\n", StorageQualifier, Precision, vecType, iNumComponents, InputName, - psDecl->asOperands[0].aui32ArraySizes[0]); + if (registerNotDeclared) + bformata(glsl, "%s1 Input%d[%d];\n", vecType, psDecl->asOperands[0].ui32RegisterNumber, arraySize); - bformata(glsl, "%s%d Input%d[%d];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].aui32ArraySizes[0]); + bformata(glsl, "%s %s %s %s [%d];\n", StorageQualifier, Precision, scalarType, InputName, + arraySize); - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->asOperands[0].aui32ArraySizes[0]; - } - break; + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = arraySize; } - default: + else { + if (registerNotDeclared) + bformata(glsl, "%s%d Input%d[%d];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].aui32ArraySizes[0]); - if(psDecl->asOperands[0].eType == OPERAND_TYPE_SPECIAL_TEXCOORD) - { - InputName = "TexCoord"; - } - - if(iNumComponents == 1) - { - psContext->psShader->abScalarInput[psDecl->asOperands[0].ui32RegisterNumber] = 1; - - bformata(glsl, "%s %s %s %s %s;\n", Interpolation, StorageQualifier, Precision, scalarType, InputName); - bformata(glsl, "%s1 Input%d;\n", vecType, psDecl->asOperands[0].ui32RegisterNumber); + bformata(glsl, "%s %s %s%d %s [%d];\n", StorageQualifier, Precision, vecType, iNumComponents, InputName, + psDecl->asOperands[0].aui32ArraySizes[0]); - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = -1; - } - else - { - if(psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber] > 0) - { - bformata(glsl, "%s %s %s %s%d %s", Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - bformata(glsl, "[%d];\n", psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]); + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->asOperands[0].aui32ArraySizes[0]; + } + break; + } + default: + { - bformata(glsl, "%s%d Input%d[%d];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber, - psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]); + if(psDecl->asOperands[0].eType == OPERAND_TYPE_SPECIAL_TEXCOORD) + { + InputName = "TexCoord"; + } + + if(psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber] > 0) + { + if (registerNotDeclared) + bformata(glsl, "%s%d Input%d[%d];\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber, + psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]); + bformata(glsl, "%s %s %s %s%d %s", Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + bformata(glsl, "[%d];\n", psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]); - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]; - } - else - { - bformata(glsl, "%s %s %s %s%d %s;\n", Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - bformata(glsl, "%s%d Input%d;\n", vecType, iNumComponents, psDecl->asOperands[0].ui32RegisterNumber); + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = psShader->aIndexedInput[psDecl->asOperands[0].ui32RegisterNumber]; + } + else + { + if (registerNotDeclared) + bformata(glsl, "%s4 Input%d;\n", vecType, psDecl->asOperands[0].ui32RegisterNumber); + if ((lowestComponent == 0) && (iNumComponents == 1) && (psShader->eShaderType == VERTEX_SHADER)) + bformata(glsl, "%s %s %s %s %s;\n", Interpolation, StorageQualifier, Precision, scalarType, InputName); + else + bformata(glsl, "%s %s %s %s%d %s;\n", Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); - psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = -1; - } - } - break; + psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] = -1; } + break; } } @@ -567,8 +620,27 @@ static void DeclareInput( if(psShader->aiInputDeclaredSize[psDecl->asOperands[0].ui32RegisterNumber] == -1) //Not an array { + uint32_t mask32; + if (psSignature) + mask32 = psSignature->ui32Mask; + else + mask32 = psDecl->asOperands[0].ui32CompMask; + char maskDest[6] = { 0 }; + int maskpos = 0; + maskDest[maskpos++] = '.'; + if (mask32 & OPERAND_4_COMPONENT_MASK_X) maskDest[maskpos++] = 'x'; + if (mask32 & OPERAND_4_COMPONENT_MASK_Y) maskDest[maskpos++] = 'y'; + if (mask32 & OPERAND_4_COMPONENT_MASK_Z) maskDest[maskpos++] = 'z'; + if (mask32 & OPERAND_4_COMPONENT_MASK_W) maskDest[maskpos++] = 'w'; + + char maskSrc[6] = { '.', 'x', 'y', 'z', 'w', 0 }; + if ((lowestComponent == 0) && (iNumComponents == 1) && (psShader->eShaderType == VERTEX_SHADER)) + maskSrc[0] = 0; + else + maskSrc[iNumComponents+1] = 0; + AddIndentation(psContext); - bformata(psContext->earlyMain, "Input%d = %s;\n", psDecl->asOperands[0].ui32RegisterNumber, InputName); + bformata(psContext->earlyMain, "Input%d%s = %s%s;\n", psDecl->asOperands[0].ui32RegisterNumber, &maskDest[0], InputName, &maskSrc[0]); } else { @@ -829,79 +901,99 @@ void AddBuiltinOutput(HLSLCrossCompilerContext* psContext, const Declaration* ps } } +static void WriteOutputFixup(HLSLCrossCompilerContext* psContext, const Operand* psOperand, const char* OutputName) +{ + // Here, we copy from a temporary buffer (called OutputXX) to + // the actual output variable "OutputName". This allows us to deal with + // cases where multiple outputs exist on the same register "location" + psContext->havePostShaderCode[psContext->currentPhase] = 1; + + psContext->currentGLSLString = &psContext->postShaderCode[psContext->currentPhase]; + bstring glsl = *psContext->currentGLSLString; + + bcatcstr(glsl, OutputName); + AddSwizzleUsingElementCount(psContext, GetNumSwizzleElements(psOperand)); + bformata(glsl, " = Output%d", psOperand->ui32RegisterNumber); + TranslateOperandSwizzle(psContext, psOperand); + bcatcstr(glsl, ";\n"); + + psContext->currentGLSLString = &psContext->glsl; + glsl = *psContext->currentGLSLString; +} + void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) { bstring glsl = *psContext->currentGLSLString; Shader* psShader = psContext->psShader; - if(OutputNeedsDeclaring(psContext, &psDecl->asOperands[0], 1)) - { - const Operand* psOperand = &psDecl->asOperands[0]; - const char* Precision = ""; - const char* type = "vec"; + const char* Precision = ""; + const char* type = "vec"; - InOutSignature* psSignature = NULL; + const Operand* psOperand = &psDecl->asOperands[0]; + InOutSignature* psSignature = NULL; - GetOutputSignatureFromRegister( - psContext->currentPhase, - psDecl->asOperands[0].ui32RegisterNumber, - psDecl->asOperands[0].ui32CompMask, - psShader->ui32CurrentVertexOutputStream, - &psShader->sInfo, - &psSignature); + GetOutputSignatureFromRegister( + psContext->currentPhase, + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psShader->sInfo, + &psSignature); - switch(psSignature->eComponentType) + switch(psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: { - case INOUT_COMPONENT_UINT32: + type = "uvec"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "ivec"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + } + + if(HavePrecisionQualifers(psShader->eTargetLanguage)) + { + switch(psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: { - type = "uvec"; + Precision = "highp"; break; } - case INOUT_COMPONENT_SINT32: + case OPERAND_MIN_PRECISION_FLOAT_16: { - type = "ivec"; + Precision = "mediump"; break; } - case INOUT_COMPONENT_FLOAT32: + case OPERAND_MIN_PRECISION_FLOAT_2_8: { + Precision = "lowp"; break; } - } - - if(HavePrecisionQualifers(psShader->eTargetLanguage)) - { - switch(psOperand->eMinPrecision) + case OPERAND_MIN_PRECISION_SINT_16: { - case OPERAND_MIN_PRECISION_DEFAULT: - { - Precision = "highp"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_16: - { - Precision = "mediump"; - break; - } - case OPERAND_MIN_PRECISION_FLOAT_2_8: - { - Precision = "lowp"; - break; - } - case OPERAND_MIN_PRECISION_SINT_16: - { - Precision = "mediump"; - //type = "ivec"; - break; - } - case OPERAND_MIN_PRECISION_UINT_16: - { - Precision = "mediump"; - //type = "uvec"; - break; - } + Precision = "mediump"; + //type = "ivec"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + //type = "uvec"; + break; } } + } + if(OutputNeedsDeclaring(psContext, &psDecl->asOperands[0], 1)) + { switch(psShader->eShaderType) { case PIXEL_SHADER: @@ -978,7 +1070,13 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec } case VERTEX_SHADER: { - int iNumComponents = 4;//GetMaxComponentFromComponentMask(&psDecl->asOperands[0]); + // int iNumComponents = 4;//GetMaxComponentFromComponentMask(&psDecl->asOperands[0]); + ASSERT(psDecl->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + ASSERT(psDecl->asOperands[0].iWriteMaskEnabled && (psDecl->asOperands[0].iNumComponents == 4)); + int highestComponent = MSBBit(psDecl->asOperands[0].ui32CompMask); // (zero based bit indexes) + int lowestComponent = LSBBit(psDecl->asOperands[0].ui32CompMask); + int iNumComponents = highestComponent - lowestComponent + 1; + const char* Interpolation = ""; int stream = 0; const char* OutputName = GetDeclaredOutputName(psContext, VERTEX_SHADER, psOperand, &stream); @@ -993,8 +1091,21 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) { - if (!(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) - bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + if (!(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) + { + // note that we have to write the "component" qualifier for every variable that shares the + // same location (otherwise the glsl compiler will consider the variable to use all 4 components). + // Since we don't know if there will be future variables sharing this location, + // that means we have to write "component" for all variables... + if (HasInterfaceComponentQualifier(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "layout(location = %d, component = %d) ", psDecl->asOperands[0].ui32RegisterNumber, lowestComponent); + } + else + { + bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + } + } } if(InOutSupported(psContext->psShader->eTargetLanguage)) @@ -1005,8 +1116,14 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec { bformata(glsl, "%s varying %s %s%d %s;\n", Interpolation, Precision, type, iNumComponents, OutputName); } - bformata(glsl, "#define Output%d %s\n", psDecl->asOperands[0].ui32RegisterNumber, OutputName); + // We're going to define a special temporary vec4 for this output index. + // The values in this temporary will be redirected to the true outputs in a post + // shader fixup section. + // Note that there may be issues if the types of the overlapping outputs are not the same + // (ie, some are float, some are int) + bformata(glsl, "%s4 Output%d;\n", type, psDecl->asOperands[0].ui32RegisterNumber); + WriteOutputFixup(psContext, psOperand, OutputName); break; } case GEOMETRY_SHADER: @@ -1037,6 +1154,10 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec ASSERT(psDecl->asOperands[0].ui32RegisterNumber!=0);//Reg 0 should be gl_out[gl_InvocationID].gl_Position. + int highestComponent = MSBBit(psDecl->asOperands[0].ui32CompMask); // (zero based bit indexes) + int lowestComponent = LSBBit(psDecl->asOperands[0].ui32CompMask); + int iNumComponents = highestComponent - lowestComponent + 1; + if(psContext->currentPhase == HS_JOIN_PHASE) { bformata(glsl, "out patch %s4 %s[];\n", type, OutputName); @@ -1048,20 +1169,24 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); } - bformata(glsl, "out %s4 %s[];\n", type, OutputName); + bformata(glsl, "out %s%d %s[];\n", type, iNumComponents, OutputName); } bformata(glsl, "#define Output%d %s[gl_InvocationID]\n", psDecl->asOperands[0].ui32RegisterNumber, OutputName); break; } case DOMAIN_SHADER: { + int highestComponent = MSBBit(psDecl->asOperands[0].ui32CompMask); // (zero based bit indexes) + int lowestComponent = LSBBit(psDecl->asOperands[0].ui32CompMask); + int iNumComponents = highestComponent - lowestComponent + 1; + int stream = 0; const char* OutputName = GetDeclaredOutputName(psContext, DOMAIN_SHADER, psOperand, &stream); if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) { bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); } - bformata(glsl, "out %s4 %s;\n", type, OutputName); + bformata(glsl, "out %s%d %s;\n", type, iNumComponents, OutputName); bformata(glsl, "#define Output%d %s\n", psDecl->asOperands[0].ui32RegisterNumber, OutputName); break; } @@ -1095,60 +1220,180 @@ void AddUserOutput(HLSLCrossCompilerContext* psContext, const Declaration* psDec if((psContext->flags & (HLSLCC_FLAG_INOUT_SEMANTIC_NAMES|HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES)) && (psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT)) { - const Operand* psOperand = &psDecl->asOperands[0]; - InOutSignature* psSignature = NULL; - const char* type = "vec"; + const char* Interpolation = ""; int stream = 0; - const char* OutputName = GetDeclaredOutputName(psContext, psShader->eShaderType, psOperand, &stream); + const char* OutputName = GetDeclaredOutputName(psContext, VERTEX_SHADER, psOperand, &stream); - GetOutputSignatureFromRegister( - psContext->currentPhase, - psOperand->ui32RegisterNumber, - psOperand->ui32CompMask, - 0, - &psShader->sInfo, - &psSignature); + if(psContext->psDependencies) + { + if(psShader->eShaderType == VERTEX_SHADER) + { + Interpolation = GetInterpolationString(psContext->psDependencies->aePixelInputInterpolation[psDecl->asOperands[0].ui32RegisterNumber]); + } + } + + int highestComponent = MSBBit(psDecl->asOperands[0].ui32CompMask); // (zero based bit indexes) + int lowestComponent = LSBBit(psDecl->asOperands[0].ui32CompMask); + int iNumComponents = highestComponent - lowestComponent + 1; if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) { if (!((psShader->eShaderType == VERTEX_SHADER) && (psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS))) - bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + { + if (HasInterfaceComponentQualifier(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "layout(location = %d, component = %d) ", psDecl->asOperands[0].ui32RegisterNumber, lowestComponent); + } + else + { + bformata(glsl, "layout(location = %d) ", psDecl->asOperands[0].ui32RegisterNumber); + } + } } - switch(psSignature->eComponentType) - { - case INOUT_COMPONENT_UINT32: - { - type = "uvec"; - break; - } - case INOUT_COMPONENT_SINT32: - { - type = "ivec"; - break; - } - case INOUT_COMPONENT_FLOAT32: - { - break; - } - } - bformata(glsl, "out %s4 %s;\n", type, OutputName); + bformata(glsl, "%s out %s %s%d %s;\n", Interpolation, Precision, type, iNumComponents, OutputName); + + WriteOutputFixup(psContext, psOperand, OutputName); + } + } +} + +void WriteUniformLayout( + HLSLCrossCompilerContext* psContext, + ResourceBinding* srcResBinding, + ConstantBuffer* srcCBBinding, + unsigned ui32BindingPoint, unsigned shaderStage, + const char* extraLayoutQualifiers, + bstring glsl) +{ + // Write the "layout(...)" decoration that should preceed uniform objects + // We have 3 qualifiers we're going to consider: + // "location" + // "binding" + // "set" + // + // This implementation was intended to support a path from HLSL to SPIR-V (via GLSL). + // Since HLSL 5 has no "descriptor set" concept, we're going to assign one based on + // the resource type. + // For example, in HLSL we can assign a resource to "register(t8)" or "register(b8)" + // 8 is the binding point, but we need to use different descriptor sets to separate + // the texture resources from the buffer resources (etc) + // + // If the caller has provided a function for evaluating correct bindings, then we + // should use that. Otherwise, we have a few built-in rules... + // + // Note that to maintain backwards compatibility, we follow these rules: + // * when HLSLCC_FLAG_PREFER_BINDINGS is set, we always write a binding, and never a location + // * otherwise, we write bindings for UBO and unordered access buffers, but write a location for resources + // The second case is the previous behaviour of this library. + + if (psContext->pEvaluateBindingFn) { + GLSLResourceBinding binding; + binding._locationIndex = ~0u; + binding._bindingIndex = ~0u; + binding._setIndex = ~0u; + binding._flags = 0; + uint32_t bindingAttempt = + (*psContext->pEvaluateBindingFn)( + psContext->pEvaluateBindingData, + &binding, + srcResBinding, srcCBBinding, + ui32BindingPoint, shaderStage); + if (bindingAttempt) { + bcatcstr(glsl, "layout("); + int needComma = 0; + + if (binding._flags & GLSL_BINDING_TYPE_PUSHCONSTANTS) { + bcatcstr(glsl, "push_constant"); + needComma = 1; + } - psContext->havePostShaderCode[psContext->currentPhase] = 1; + if (binding._locationIndex != ~0u) { + if (needComma) bcatcstr(glsl, ", "); + bformata(glsl, "location = %d", binding._locationIndex); + needComma = 1; + } + + if (binding._bindingIndex != ~0u) { + if (needComma) bcatcstr(glsl, ", "); + bformata(glsl, "binding = %d", binding._bindingIndex); + needComma = 1; + } + + if (binding._setIndex != ~0u) { + if (needComma) bcatcstr(glsl, ", "); + bformata(glsl, "set = %d", binding._setIndex); + needComma = 1; + } + + if (extraLayoutQualifiers != NULL && extraLayoutQualifiers[0]) { + if (needComma) bcatcstr(glsl, ", "); + bcatcstr(glsl, extraLayoutQualifiers); + needComma = 1; + } + + bcatcstr(glsl, ") "); + return; + } + } - psContext->currentGLSLString = &psContext->postShaderCode[psContext->currentPhase]; - glsl = *psContext->currentGLSLString; + ResourceGroup resGroup = RGROUP_CBUFFER; + if (srcResBinding) + resGroup = ResourceTypeToResourceGroup(srcResBinding->eType); - bcatcstr(glsl, OutputName); - AddSwizzleUsingElementCount(psContext, GetNumSwizzleElements(psOperand)); - bformata(glsl, " = Output%d", psOperand->ui32RegisterNumber); - TranslateOperandSwizzle(psContext, psOperand); - bcatcstr(glsl, ";\n"); + unsigned preferBindings = (resGroup == RGROUP_CBUFFER) || (resGroup == RGROUP_UAV); - psContext->currentGLSLString = &psContext->glsl; - glsl = *psContext->currentGLSLString; + if (preferBindings && HaveBindingQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + { + const unsigned assignSet = 0; + if (!assignSet) + { + // when not assigning a set, we need to shift the binding point to after all constant buffers + // (to avoid overlapping) + if (resGroup == RGROUP_TEXTURE) + ui32BindingPoint += psContext->psShader->sInfo.ui32NumConstantBuffers; } + + bformata(glsl, "layout(binding = %d", ui32BindingPoint); + + if (extraLayoutQualifiers != NULL && extraLayoutQualifiers[0]) { + bcatcstr(glsl, ", "); + bcatcstr(glsl, extraLayoutQualifiers); + } + bcatcstr(glsl, ") "); + + return; } + + // If we haven't selected to write a binding, let's write a location. This maintains backward compatibility + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + { + if (resGroup == RGROUP_TEXTURE) + ui32BindingPoint += psContext->psShader->sInfo.ui32NumConstantBuffers; + + bformata(glsl, "layout(location = %d", ui32BindingPoint); + + if (extraLayoutQualifiers != NULL && extraLayoutQualifiers[0]) { + bcatcstr(glsl, ", "); + bcatcstr(glsl, extraLayoutQualifiers); + } + bcatcstr(glsl, ") "); + } +} + +static void InsertUBOInstName(ShaderVarType* dst, const char uboInstName[]) +{ + uint32_t i; + size_t len = strlen(uboInstName); + size_t dstLen = strlen(dst->FullName); + char* insert = dst->FullName; + + memmove_s(insert+len+1, MAX_REFLECT_STRING_LENGTH, insert, dstLen+1); + memcpy(insert, uboInstName, len); + insert[len] = '.'; + + for (i=0; iMemberCount; ++i) + InsertUBOInstName(&dst->Members[i], uboInstName); } void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint32_t ui32BindingPoint, @@ -1170,9 +1415,7 @@ void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint32_t ui3 } /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(binding = %d) ", ui32BindingPoint); - + WriteUniformLayout(psContext, NULL, psCBuf, ui32BindingPoint, psContext->psShader->eShaderType, NULL, glsl); bformata(glsl, "uniform %s {\n ", Name); for(i=0; i < psCBuf->ui32NumVars; ++i) @@ -1181,12 +1424,30 @@ void DeclareUBOConstants(HLSLCrossCompilerContext* psContext, const uint32_t ui3 psCBuf->asVars[i].Name, &psCBuf->asVars[i].sType, 0); } - - bcatcstr(glsl, "};\n"); + + // When GL_KHR_vulkan_glsl is enabled, always use an "instance" name for cbuffers + // This is required when using the layout(push_constant) qualifier. But at this point, + // we can't easily see what binding was used. So we just use an instance name for all + // cbuffers. + uint32_t useUBOInstName = + psContext->psShader->extensions && ((GlExtensions*)psContext->psShader->extensions)->GL_KHR_vulkan_glsl; + + if (useUBOInstName) { + char instName[MAX_REFLECT_STRING_LENGTH]; + strcpy_s(instName, MAX_REFLECT_STRING_LENGTH, Name); + strcat_s(instName, MAX_REFLECT_STRING_LENGTH, "_inst"); + for(i=0; i < psCBuf->ui32NumVars; ++i) + { + InsertUBOInstName(&psCBuf->asVars[i].sType, instName); + } + bformata(glsl, "} %s;\n", instName); + } else { + bcatcstr(glsl, "};\n"); + } } void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, const uint32_t ui32BindingPoint, - ConstantBuffer* psCBuf, const Operand* psOperand, + ConstantBuffer* psCBuf, ResourceBinding* pResourceBinding, const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, const ResourceType eResourceType, bstring glsl) @@ -1210,7 +1471,7 @@ void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, const uint32_t u } else { - ResourceName(StructName, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); + bformata(StructName, "StorageBuffer%d", psOperand->ui32RegisterNumber); } PreDeclareStructType(glsl, @@ -1218,8 +1479,7 @@ void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, const uint32_t u &psCBuf->asVars[0].sType); /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - bformata(glsl, "layout(binding = %d) ", ui32BindingPoint); + WriteUniformLayout(psContext, pResourceBinding, psCBuf, ui32BindingPoint, psContext->psShader->eShaderType, NULL, glsl); if(ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) { @@ -1231,7 +1491,7 @@ void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, const uint32_t u bcatcstr(glsl, "readonly "); } - bformata(glsl, "buffer Block%d {\n", psOperand->ui32RegisterNumber); + bformata(glsl, "buffer %s {\n", psCBuf->Name); DeclareConstBufferShaderVariable(glsl, bstr2cstr(StructName, '\0'), @@ -1453,6 +1713,169 @@ char* GetSamplerType(HLSLCrossCompilerContext* psContext, return "sampler2D"; } +char* GetTextureType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + // This is used when GL_KHR_vulkan_glsl is enabled, and allows us to specify + // a texture object is in independant from any sampling function (much like the DirectX + // method of separating textures and samplers) + // Just following the pattern from GetSamplerType very closely + + ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psContext->psShader->sInfo, &psBinding); + if(found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch(eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itextureBuffer"; + case RETURN_TYPE_UINT: + return "utextureBuffer"; + default: + return "samplerBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture1D"; + case RETURN_TYPE_UINT: + return "utexture1D"; + default: + return "texture1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture2D"; + case RETURN_TYPE_UINT: + return "utexture2D"; + default: + return "texture2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMS"; + case RETURN_TYPE_UINT: + return "utexture2DMS"; + default: + return "texture2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture3D"; + case RETURN_TYPE_UINT: + return "utexture3D"; + default: + return "texture3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itextureCube"; + case RETURN_TYPE_UINT: + return "utextureCube"; + default: + return "textureCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture1DArray"; + case RETURN_TYPE_UINT: + return "utexture1DArray"; + default: + return "texture1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture2DArray"; + case RETURN_TYPE_UINT: + return "utexture2DArray"; + default: + return "texture2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMSArray"; + case RETURN_TYPE_UINT: + return "utexture2DMSArray"; + default: + return "texture2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch(eType) + { + case RETURN_TYPE_SINT: + return "itextureCubeArray"; + case RETURN_TYPE_UINT: + return "utextureCubeArray"; + default: + return "textureCubeArray"; + } + break; + } + } + + return "texture2D"; +} + static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) { bstring glsl = *psContext->currentGLSLString; @@ -1484,6 +1907,13 @@ static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const ConcatTextureSamplerName(glsl, &psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, psDecl->ui32SamplerUsed[i], 0); bcatcstr(glsl, ";\n"); } + } + else if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + samplerTypeName = GetTextureType( + psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); } if(samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) @@ -1540,7 +1970,14 @@ void TranslateDeclaration(HLSLCrossCompilerContext* psContext, const Declaration } case NAME_INSTANCE_ID: { - AddBuiltinInput(psContext, psDecl, "gl_InstanceID"); + if (UseSPIRVNames(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + AddBuiltinInput(psContext, psDecl, "gl_InstanceIndex"); + } + else + { + AddBuiltinInput(psContext, psDecl, "gl_InstanceID"); + } break; } case NAME_IS_FRONT_FACE: @@ -1551,7 +1988,22 @@ void TranslateDeclaration(HLSLCrossCompilerContext* psContext, const Declaration Suggests no implicit conversion for bool<->int. */ - AddBuiltinInput(psContext, psDecl, "int(gl_FrontFacing)"); + SHADER_VARIABLE_TYPE eType = GetOperandDataType(psContext, &psDecl->asOperands[0]); + switch (eType) + { + case SVT_INT: + AddBuiltinInput(psContext, psDecl, "int(gl_FrontFacing)"); + break; + case SVT_UINT: + AddBuiltinInput(psContext, psDecl, "uint(gl_FrontFacing)"); + break; + case SVT_BOOL: + AddBuiltinInput(psContext, psDecl, "bool(gl_FrontFacing)"); + break; + default: + AddBuiltinInput(psContext, psDecl, "float(gl_FrontFacing)"); + break; + } break; } case NAME_SAMPLE_INDEX: @@ -1561,7 +2013,14 @@ void TranslateDeclaration(HLSLCrossCompilerContext* psContext, const Declaration } case NAME_VERTEX_ID: { - AddBuiltinInput(psContext, psDecl, "gl_VertexID"); + if (UseSPIRVNames(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + AddBuiltinInput(psContext, psDecl, "gl_VertexIndex"); + } + else + { + AddBuiltinInput(psContext, psDecl, "gl_VertexID"); + } break; } case NAME_PRIMITIVE_ID: @@ -1732,7 +2191,6 @@ void TranslateDeclaration(HLSLCrossCompilerContext* psContext, const Declaration Would generate a vec2 and a vec3. We discard the second one making .z invalid! */ - int iNumComponents = 4;//GetMaxComponentFromComponentMask(psOperand); const char* StorageQualifier = "attribute"; const char* InputName; const char* Precision = ""; @@ -1795,7 +2253,7 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } DeclareInput(psContext, psDecl, - "", StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, InputName); + "", StorageQualifier, Precision, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, InputName); break; } @@ -1822,7 +2280,6 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! case OPCODE_DCL_INPUT_PS: { const Operand* psOperand = &psDecl->asOperands[0]; - int iNumComponents = 4;//GetMaxComponentFromComponentMask(psOperand); const char* StorageQualifier = "varying"; const char* Precision = ""; const char* InputName = GetDeclaredInputName(psContext, PIXEL_SHADER, psOperand); @@ -1904,7 +2361,7 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } DeclareInput(psContext, psDecl, - Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, InputName); + Interpolation, StorageQualifier, Precision, INDEX_1D, InputName); break; } @@ -2022,37 +2479,47 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } case OPCODE_DCL_RESOURCE: { - if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) - { - // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler - // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. - if((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) - { - //Constant buffer locations start at 0. Resource locations start at ui32NumConstantBuffers. - bformata(glsl, "layout(location = %d) ", - psContext->psShader->sInfo.ui32NumConstantBuffers + psDecl->asOperands[0].ui32RegisterNumber); - } + // Explicit layout bindings are not currently compatible with combined texture samplers. The layout below assumes there is exactly one GLSL sampler + // for each HLSL texture declaration, but when combining textures+samplers, there can be multiple OGL samplers for each HLSL texture declaration. + if ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + ResourceBinding* psBinding = 0; + int found = GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psContext->psShader->sInfo, &psBinding); + WriteUniformLayout(psContext, psBinding, NULL, psDecl->asOperands[0].ui32RegisterNumber, psContext->psShader->eShaderType, NULL, glsl); } + int canDoShadowCmp = 1; + if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + canDoShadowCmp = 0; + switch(psDecl->value.eResourceDimension) { case RESOURCE_DIMENSION_BUFFER: { - bformata(glsl, "uniform %s ", GetSamplerType(psContext, + const char* samplerTypeName = GetSamplerType( + psContext, RESOURCE_DIMENSION_BUFFER, - psDecl->asOperands[0].ui32RegisterNumber)); + psDecl->asOperands[0].ui32RegisterNumber); + if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + samplerTypeName = GetTextureType( + psContext, + RESOURCE_DIMENSION_BUFFER, + psDecl->asOperands[0].ui32RegisterNumber); + } + bformata(glsl, "uniform %s ", samplerTypeName); TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); bcatcstr(glsl, ";\n"); break; } case RESOURCE_DIMENSION_TEXTURE1D: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } case RESOURCE_DIMENSION_TEXTURE2D: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } case RESOURCE_DIMENSION_TEXTURE2DMS: @@ -2067,17 +2534,17 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } case RESOURCE_DIMENSION_TEXTURECUBE: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } case RESOURCE_DIMENSION_TEXTURE1DARRAY: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } case RESOURCE_DIMENSION_TEXTURE2DARRAY: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: @@ -2087,7 +2554,7 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } case RESOURCE_DIMENSION_TEXTURECUBEARRAY: { - TranslateResourceTexture(psContext, psDecl, 1); + TranslateResourceTexture(psContext, psDecl, canDoShadowCmp); break; } } @@ -2437,6 +2904,24 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } case OPCODE_DCL_SAMPLER: { + if ((psContext->flags & HLSLCC_FLAG_DISABLE_VULKAN_DUMMIES) != HLSLCC_FLAG_DISABLE_VULKAN_DUMMIES) + { + if ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + ResourceBinding* psBinding = 0; + int found = GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, &psContext->psShader->sInfo, &psBinding); + WriteUniformLayout(psContext, psBinding, NULL, psDecl->asOperands[0].ui32RegisterNumber, psContext->psShader->eShaderType, NULL, glsl); + + if (psBinding->ui32Flags & REFLECT_RESOURCE_FLAGS_COMPARISON_SAMPLER) { + bformata(glsl, "uniform samplerShadow "); + } + else { + bformata(glsl, "uniform sampler "); + } + TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + } + } break; } case OPCODE_DCL_HS_MAX_TESSFACTOR: @@ -2448,6 +2933,7 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! { // non-float images need either 'i' or 'u' prefix. char imageTypePrefix[2] = { 0, 0 }; + char* extraLayoutQualifiers = ""; if(psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) { bcatcstr(glsl, "coherent "); @@ -2467,20 +2953,20 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! switch(psDecl->sUAV.Type) { case RETURN_TYPE_FLOAT: - bcatcstr(glsl, "layout(rgba32f) "); + extraLayoutQualifiers = "rgba32f"; break; case RETURN_TYPE_UNORM: - bcatcstr(glsl, "layout(rgba8) "); + extraLayoutQualifiers = "rgba8"; break; case RETURN_TYPE_SNORM: - bcatcstr(glsl, "layout(rgba8_snorm) "); + extraLayoutQualifiers = "rgba8_snorm"; break; case RETURN_TYPE_UINT: - bcatcstr(glsl, "layout(rgba32ui) "); + extraLayoutQualifiers = "rgba32ui"; imageTypePrefix[0] = 'u'; break; case RETURN_TYPE_SINT: - bcatcstr(glsl, "layout(rgba32i) "); + extraLayoutQualifiers = "rgba32i"; imageTypePrefix[0] = 'i'; break; default: @@ -2488,6 +2974,12 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } } + { + ResourceBinding* psBinding = 0; + int found = GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psContext->psShader->sInfo, &psBinding); + WriteUniformLayout(psContext, psBinding, NULL, psDecl->asOperands[0].ui32RegisterNumber, psContext->psShader->eShaderType, extraLayoutQualifiers, glsl); + } + switch(psDecl->value.eResourceDimension) { case RESOURCE_DIMENSION_BUFFER: @@ -2543,12 +3035,16 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); bcatcstr(glsl, ";\n"); + + ASSERT(psDecl->asOperands[0].ui32RegisterNumber < MAX_TEXTURES); + psShader->aeUAVResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; break; } case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: { const uint32_t ui32BindingPoint = psDecl->asOperands[0].aui32ArraySizes[0]; ConstantBuffer* psCBuf = NULL; + ResourceBinding* pResBinding = NULL; if(psDecl->sUAV.bCounter) { @@ -2559,8 +3055,9 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! GetConstantBufferFromBindingPoint(RGROUP_UAV, ui32BindingPoint, &psContext->psShader->sInfo, &psCBuf); psCBuf->iUnsized = 1; + GetResourceFromBindingPoint(RGROUP_UAV, ui32BindingPoint, &psContext->psShader->sInfo, &pResBinding); - DeclareBufferVariable(psContext, ui32BindingPoint, psCBuf, &psDecl->asOperands[0], + DeclareBufferVariable(psContext, ui32BindingPoint, psCBuf, pResBinding, &psDecl->asOperands[0], psDecl->sUAV.ui32GloballyCoherentAccess, RTYPE_UAV_RWSTRUCTURED, glsl); break; } @@ -2582,11 +3079,13 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! case OPCODE_DCL_RESOURCE_STRUCTURED: { ConstantBuffer* psCBuf = NULL; + ResourceBinding* pResBinding = NULL; GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psContext->psShader->sInfo, &psCBuf); psCBuf->iUnsized = 1; + GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psContext->psShader->sInfo, &pResBinding); - DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, psCBuf, &psDecl->asOperands[0], + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, psCBuf, pResBinding, &psDecl->asOperands[0], 0, RTYPE_STRUCTURED, glsl); break; } @@ -2638,6 +3137,41 @@ Would generate a vec2 and a vec3. We discard the second one making .z invalid! } } +void TranslateDeclSignatureFmt(HLSLCrossCompilerContext* psContext, char* fmt, InOutSignature sig) +{ + bstring glsl = psContext->glsl; + char* chVec; + switch (sig.eComponentType) { + case INOUT_COMPONENT_FLOAT32:{ + chVec = "vec"; + break; + } + case INOUT_COMPONENT_SINT32: { + chVec = "ivec"; + break; + } + case INOUT_COMPONENT_UINT32: { + chVec = "uvec"; + break; + } + } + bformata(glsl, fmt, chVec, GetNumberBitsSet(sig.ui32Mask), sig.SemanticName, sig.ui32SemanticIndex); +} + +void TranslateDeclaration_HS_NoControlPointStage(HLSLCrossCompilerContext* psContext) +{ + Shader* sh = psContext->psShader; + bstring glsl = psContext->glsl; + for (uint32_t i = 0; i < sh->sInfo.ui32NumInputSignatures; i++){ + if (sh->sInfo.psInputSignatures[i].eSystemValueType != NAME_UNDEFINED) continue; + TranslateDeclSignatureFmt(psContext, "in %s%d %s%d[];\n", sh->sInfo.psInputSignatures[i]); + } + for (uint32_t i = 0; i < sh->sInfo.ui32NumOutputSignatures; i++){ + if (sh->sInfo.psOutputSignatures[i].eSystemValueType != NAME_UNDEFINED) continue; + TranslateDeclSignatureFmt(psContext, "out %s%d %s%d[];\n", sh->sInfo.psOutputSignatures[i]); + } +} + //Convert from per-phase temps to global temps for GLSL. void ConsolidateHullTempVars(Shader* psShader) { diff --git a/src/toGLSLInstruction.c b/src/toGLSLInstruction.c index eee67af..fa8b9ba 100644 --- a/src/toGLSLInstruction.c +++ b/src/toGLSLInstruction.c @@ -217,7 +217,9 @@ static void AddComparision(HLSLCrossCompilerContext* psContext, Instruction* psI TranslateOperand(psContext, &psInst->asOperands[1], typeFlag); bcatcstr(glsl, ", "); TranslateOperand(psContext, &psInst->asOperands[2], typeFlag); - bcatcstr(glsl, "))"); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0]); + bcatcstr(glsl, ")"); if (!floatResult) { bcatcstr(glsl, " * 0xFFFFFFFFu"); @@ -371,7 +373,6 @@ static void AddMOVCBinaryOp(HLSLCrossCompilerContext* psContext, const Operand * else { // TODO: We can actually do this in one op using mix(). - int srcElem = 0; for (destElem = 0; destElem < 4; ++destElem) { int numParenthesis = 0; @@ -381,7 +382,7 @@ static void AddMOVCBinaryOp(HLSLCrossCompilerContext* psContext, const Operand * AddIndentation(psContext); AddOpAssignToDestWithMask(psContext, pDest, eDestType, 1, "=", &numParenthesis, 1 << destElem); bcatcstr(glsl, "("); - TranslateOperandWithMask(psContext, src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + TranslateOperandWithMask(psContext, src0, TO_AUTO_BITCAST_TO_INT, 1 << destElem); if (psContext->psShader->ui32MajorVersion < 4) { //cmp opcode uses >= 0 @@ -392,13 +393,11 @@ static void AddMOVCBinaryOp(HLSLCrossCompilerContext* psContext, const Operand * bcatcstr(glsl, " != 0) ? "); } - TranslateOperandWithMask(psContext, src1, SVTTypeToFlag(eDestType), 1 << srcElem); + TranslateOperandWithMask(psContext, src1, SVTTypeToFlag(eDestType), 1 << destElem); bcatcstr(glsl, " : "); - TranslateOperandWithMask(psContext, src2, SVTTypeToFlag(eDestType), 1 << srcElem); + TranslateOperandWithMask(psContext, src2, SVTTypeToFlag(eDestType), 1 << destElem); AddAssignPrologue(psContext, numParenthesis); - - srcElem++; } } } @@ -451,6 +450,24 @@ static int IsOperationCommutative(OPCODE_TYPE eOpCode) }; } +static void CallUnaryOp(HLSLCrossCompilerContext* psContext, const char* name, Instruction* psInst, + int dest, int src0, SHADER_VARIABLE_TYPE eDataType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t src0SwizCount = GetNumSwizzleElements(&psInst->asOperands[src0]); + uint32_t dstSwizCount = GetNumSwizzleElements(&psInst->asOperands[dest]); + uint32_t destMask = GetOperandWriteMask(&psInst->asOperands[dest]); + int needsParenthesis = 0; + + AddIndentation(psContext); + + AddAssignToDest(psContext, &psInst->asOperands[dest], eDataType, dstSwizCount, &needsParenthesis); + + bformata(glsl, " %s ", name); + TranslateOperandWithMask(psContext, &psInst->asOperands[src0], SVTTypeToFlag(eDataType), destMask); + AddAssignPrologue(psContext, needsParenthesis); +} + static void CallBinaryOp(HLSLCrossCompilerContext* psContext, const char* name, Instruction* psInst, int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) { @@ -659,6 +676,42 @@ static void CallHelper1Int(HLSLCrossCompilerContext* psContext, AddAssignPrologue(psContext, numParenthesis); } +char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber); + +static void WriteSamplerExpression(HLSLCrossCompilerContext* psContext, uint32_t textureRegisterNumber, uint32_t samplerRegisterNumber, const int bZCompare) +{ + bstring glsl = *psContext->currentGLSLString; + const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + if (useCombinedTextureSamplers) { + if (samplerRegisterNumber != ~0) { + bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, textureRegisterNumber, samplerRegisterNumber, bZCompare)); + } else { + ResourceName(glsl, psContext, RGROUP_TEXTURE, textureRegisterNumber, bZCompare); + } + } else if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) { + char* combinedSamplerType = GetSamplerType(psContext, psContext->psShader->aeResourceDims[textureRegisterNumber], textureRegisterNumber); + bcatcstr(glsl, combinedSamplerType); + if (bZCompare) + bcatcstr(glsl, "Shadow"); + bcatcstr(glsl, "("); + ResourceName(glsl, psContext, RGROUP_TEXTURE, textureRegisterNumber, 0); + bcatcstr(glsl, ", "); + if (samplerRegisterNumber != ~0) { + ResourceName(glsl, psContext, RGROUP_SAMPLER, samplerRegisterNumber, 0); + } else { + // These cases are generated by methods such as "Load" in HLSL. Load takes no + // sampler in HLSL, but GLSL requires some sampler object (see the description of "GL_KHR_vulkan_glsl") + // We must provide a dummy sampler to get valid GLSL output. + bcatcstr(glsl, "hlslcc_DummySampler"); + } + bcatcstr(glsl, ")"); + } else { + ResourceName(glsl, psContext, RGROUP_TEXTURE, textureRegisterNumber, bZCompare); + } +} + static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, Instruction* psInst, ResourceBinding* psBinding, @@ -675,7 +728,7 @@ static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: case REFLECT_RESOURCE_DIMENSION_BUFFER: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) @@ -686,7 +739,7 @@ static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); bcatcstr(glsl, ", 0)"); @@ -695,7 +748,7 @@ static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); bcatcstr(glsl, ", 0)"); @@ -704,7 +757,7 @@ static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: // TODO does this make any sense at all? { ASSERT(psInst->eOpcode == OPCODE_LD_MS); - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); bcatcstr(glsl, ", "); @@ -715,7 +768,7 @@ static void TranslateTexelFetch(HLSLCrossCompilerContext* psContext, case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: { ASSERT(psInst->eOpcode == OPCODE_LD_MS); - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); bcatcstr(glsl, ", "); @@ -753,7 +806,7 @@ static void TranslateTexelFetchOffset(HLSLCrossCompilerContext* psContext, { case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); bformata(glsl, ", 0, %d)", psInst->iUAddrOffset); @@ -761,7 +814,7 @@ static void TranslateTexelFetchOffset(HLSLCrossCompilerContext* psContext, } case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); bformata(glsl, ", 0, ivec2(%d, %d))", @@ -771,7 +824,7 @@ static void TranslateTexelFetchOffset(HLSLCrossCompilerContext* psContext, } case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); bformata(glsl, ", 0, ivec3(%d, %d, %d))", @@ -782,7 +835,7 @@ static void TranslateTexelFetchOffset(HLSLCrossCompilerContext* psContext, } case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); bformata(glsl, ", 0, ivec2(%d, %d))", psInst->iUAddrOffset, psInst->iVAddrOffset); @@ -790,7 +843,7 @@ static void TranslateTexelFetchOffset(HLSLCrossCompilerContext* psContext, } case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: { - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0, 0); bcatcstr(glsl, ", "); TranslateOperandWithMask(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); bformata(glsl, ", 0, int(%d))", psInst->iUAddrOffset); @@ -906,7 +959,6 @@ void GetResInfoData(HLSLCrossCompilerContext* psContext, Instruction* psInst, in bstring glsl = *psContext->currentGLSLString; int numParenthesis = 0; const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; AddIndentation(psContext); AddOpAssignToDestWithMask(psContext, &psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, "=", &numParenthesis, 1 << destElem); @@ -914,6 +966,29 @@ void GetResInfoData(HLSLCrossCompilerContext* psContext, Instruction* psInst, in //[width, height, depth or array size, total-mip-count] if (index < 3) { + RESOURCE_DIMENSION eResDim; + const char* queryFunction = "textureSize"; + int includeLODParameter = 1; + int isUAV = 0; + + // "UAV" types in HLSL become "image" types in GLSL. + // In these cases, we must use "imageSize" rather than "textureSize" + // However, this function is only available in GLSL v4.3 or greater + // If are compiling to an earlier version of GLSL, and we hit this instruction, + // we will probably generate uncompilable GLSL code + if (HasImageSizeFunction(psContext->psShader->eTargetLanguage) + && psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) { + ASSERT(psInst->asOperands[2].ui32RegisterNumber < MAX_TEXTURES); + eResDim = psContext->psShader->aeUAVResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + queryFunction = "imageSize"; + includeLODParameter = 0; + isUAV = 1; + } else { + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); + ASSERT(psInst->asOperands[2].ui32RegisterNumber < MAX_TEXTURES); + eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + } + int dim = GetNumTextureDimensions(psContext, eResDim); bcatcstr(glsl, "("); if (dim < (index + 1)) @@ -924,19 +999,30 @@ void GetResInfoData(HLSLCrossCompilerContext* psContext, Instruction* psInst, in { if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) { - bformata(glsl, "uvec%d(textureSize(", dim); + bformata(glsl, "uvec%d(%s(", dim, queryFunction); } else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) { - bformata(glsl, "vec%d(1.0) / vec%d(textureSize(", dim, dim); + bformata(glsl, "vec%d(1.0) / vec%d(%s(", dim, dim, queryFunction); } else { - bformata(glsl, "vec%d(textureSize(", dim); + bformata(glsl, "vec%d(%s(", dim, queryFunction); } - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); - bcatcstr(glsl, ", "); - TranslateOperand(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER); + + // For "texture" types, we still need to write the sampler expression + // This will expand to something like sampler2d(textureName, hlslcc_DummySampler) when using + // GL_KHR_vulkan_glsl. + if (!isUAV) { + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0u, 0); + } else { + TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + } + + if (includeLODParameter) { + bcatcstr(glsl, ", "); + TranslateOperand(psContext, &psInst->asOperands[1], TO_FLAG_INTEGER); + } bcatcstr(glsl, "))"); switch (index) @@ -957,12 +1043,13 @@ void GetResInfoData(HLSLCrossCompilerContext* psContext, Instruction* psInst, in } else { + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) bcatcstr(glsl, "uint("); else bcatcstr(glsl, "float("); bcatcstr(glsl, "textureQueryLevels("); - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, ~0u, 0); bcatcstr(glsl, "))"); } AddAssignPrologue(psContext, numParenthesis); @@ -989,12 +1076,11 @@ static void TranslateTextureSample(HLSLCrossCompilerContext* psContext, Instruct uint32_t ui32NumOffsets = 0; + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; - ASSERT(psInst->asOperands[2].ui32RegisterNumber < MAX_TEXTURES); if (psInst->bAddressOffset) @@ -1089,10 +1175,7 @@ static void TranslateTextureSample(HLSLCrossCompilerContext* psContext, Instruct { bcatcstr(glsl, "texture("); } - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? 1 : 0); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? 1 : 0)); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? 1 : 0); bcatcstr(glsl, ","); TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); bcatcstr(glsl, ","); @@ -1136,10 +1219,7 @@ static void TranslateTextureSample(HLSLCrossCompilerContext* psContext, Instruct { bformata(glsl, "%s%s(", funcName, offset); } - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 1); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1)); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1); bformata(glsl, ", %s(", depthCmpCoordType); TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); bcatcstr(glsl, ","); @@ -1173,10 +1253,7 @@ static void TranslateTextureSample(HLSLCrossCompilerContext* psContext, Instruct { bformata(glsl, "%s%s(", funcName, offset); } - if (!useCombinedTextureSamplers) - TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE);//resource - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 0)); + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 0); bcatcstr(glsl, ", "); TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); @@ -1364,7 +1441,7 @@ static void TranslateShaderStorageStore(HLSLCrossCompilerContext* psContext, Ins } else { - TranslateOperand(psContext, psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + bformata(glsl, "StorageBuffer%d", psDest->ui32RegisterNumber); } bformata(glsl, "["); if (structured) //Dest address and dest byte offset @@ -1570,7 +1647,7 @@ static void TranslateShaderStorageLoad(HLSLCrossCompilerContext* psContext, Inst } if (psSrc->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) { - bformata(glsl, "%s[", psCBuf->Name); + bformata(glsl, "StorageBuffer%d[", psSrc->ui32RegisterNumber); TranslateOperand(psContext, psSrcAddr, TO_FLAG_INTEGER); bcatcstr(glsl, "]"); if (strcmp(psVar->Name, "$Element") != 0) @@ -2286,8 +2363,13 @@ static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psContext->psShader->sInfo, &psCBuf); GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &index, &rebase); - if (psVarType->Class == SVC_SCALAR) - psOperand->iNumComponents = 1; + // DavidJ -- Disabled this. It's causing problems with swizzling sometimes because + // we can't distinguish between operands with swizzle settings and those + // wihtout. + // It's not clear what the intended reason for it... So might be best + // just to get rid of it. + // if (psVarType->Class == SVC_SCALAR) + // psOperand->iNumComponents = 1; } @@ -2299,7 +2381,7 @@ void SetDataTypes(HLSLCrossCompilerContext* psContext, Instruction* psInst, cons SHADER_VARIABLE_TYPE aeTempVecType[MAX_TEMP_VEC4 * 4]; - if (psContext->psShader->ui32MajorVersion <= 3) + if ((psContext->psShader->ui32MajorVersion <= 3) || (psContext->psShader->eShaderType==HULL_SHADER)) { for (i = 0; i < MAX_TEMP_VEC4 * 4; ++i) { @@ -2699,7 +2781,6 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn case OPCODE_FTOI: case OPCODE_FTOU: { - uint32_t dstCount = GetNumSwizzleElements(&psInst->asOperands[0]); uint32_t srcCount = GetNumSwizzleElements(&psInst->asOperands[1]); uint32_t ui32DstFlags = TO_FLAG_DESTINATION; const SHADER_VARIABLE_TYPE eSrcType = GetOperandDataType(psContext, &psInst->asOperands[1]); @@ -2715,15 +2796,20 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn AddIndentation(psContext); - AddAssignToDest(psContext, &psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForType(psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT, srcCount == dstCount ? dstCount : 4)); + AddAssignToDest(psContext, &psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT, srcCount, &numParenthesis); + bcatcstr(glsl, GetConstructorForType(psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT, 4)); bcatcstr(glsl, "("); // 1 TranslateOperand(psContext, &psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT); + // we have to smear out the scalars here... + //if (srcCount == 1) bcatcstr(glsl, ".xxxx"); bcatcstr(glsl, ")"); // 1 - // Add destination writemask if the component counts do not match - if (srcCount != dstCount) - AddSwizzleUsingElementCount(psContext, dstCount); + + // this seems like the only reliable way to catch every swizzling case --- + // (note that this is using the swizzle on the write element, because we took care of the swizzle + // on operands[1] in the translate operand call) + AddSwizzleUsingOrderedElements(psContext, &psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL); AddAssignPrologue(psContext, numParenthesis); + //*/ break; } @@ -2742,7 +2828,6 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn { const SHADER_VARIABLE_TYPE eDestType = GetOperandDataType(psContext, &psInst->asOperands[0]); const SHADER_VARIABLE_TYPE eSrcType = GetOperandDataType(psContext, &psInst->asOperands[1]); - uint32_t dstCount = GetNumSwizzleElements(&psInst->asOperands[0]); uint32_t srcCount = GetNumSwizzleElements(&psInst->asOperands[1]); #ifdef _DEBUG @@ -2758,13 +2843,17 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn #endif AddIndentation(psContext); AddAssignToDest(psContext, &psInst->asOperands[0], SVT_FLOAT, srcCount, &numParenthesis); - bcatcstr(glsl, GetConstructorForType(SVT_FLOAT, srcCount == dstCount ? dstCount : 4)); + bcatcstr(glsl, GetConstructorForType(SVT_FLOAT, 4)); bcatcstr(glsl, "("); // 1 TranslateOperand(psContext, &psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT); + // we have to smear out the scalars here... + //if (srcCount == 1) bcatcstr(glsl, ".xxxx"); bcatcstr(glsl, ")"); // 1 - // Add destination writemask if the component counts do not match - if (srcCount != dstCount) - AddSwizzleUsingElementCount(psContext, dstCount); + + // this seems like the only reliable way to catch every swizzling case --- + // (note that this is using the swizzle on the write element, because we took care of the swizzle + // on operands[1] in the translate operand call) + AddSwizzleUsingOrderedElements(psContext, &psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL); AddAssignPrologue(psContext, numParenthesis); break; } @@ -2793,6 +2882,16 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn CallTernaryOp(psContext, "*", "+", psInst, 0, 1, 2, 3, ui32Flags); break; } + case OPCODE_UMAD: + { + uint32_t ui32Flags = TO_FLAG_UNSIGNED_INTEGER; +#ifdef _DEBUG + AddIndentation(psContext); + bcatcstr(glsl, "//UMAD\n"); +#endif + CallTernaryOp(psContext, "*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } case OPCODE_DADD: { #ifdef _DEBUG @@ -2885,6 +2984,18 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn CallBinaryOp(psContext, "*", psInst, 1, 2, 3, eType); break; } + case OPCODE_UMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_UINT; +#ifdef _DEBUG + AddIndentation(psContext); + bcatcstr(glsl, "//UMUL\n"); +#endif + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp(psContext, "*", psInst, 1, 2, 3, eType); + break; + } case OPCODE_UDIV: { #ifdef _DEBUG @@ -3169,6 +3280,15 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn CallHelper2Int(psContext, "max", psInst, 0, 1, 2, 1); break; } + case OPCODE_UMAX: + { +#ifdef _DEBUG + AddIndentation(psContext); + bcatcstr(glsl, "//UMAX\n"); +#endif + CallHelper2UInt(psContext, "max", psInst, 0, 1, 2, 1); + break; + } case OPCODE_MAX: { #ifdef _DEBUG @@ -3187,6 +3307,15 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn CallHelper2Int(psContext, "min", psInst, 0, 1, 2, 1); break; } + case OPCODE_UMIN: + { +#ifdef _DEBUG + AddIndentation(psContext); + bcatcstr(glsl, "//UMIN\n"); +#endif + CallHelper2UInt(psContext, "min", psInst, 0, 1, 2, 1); + break; + } case OPCODE_MIN: { #ifdef _DEBUG @@ -3199,8 +3328,8 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn case OPCODE_GATHER4: { //dest, coords, tex, sampler + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; #ifdef _DEBUG AddIndentation(psContext); bcatcstr(glsl, "//GATHER4\n"); @@ -3209,12 +3338,7 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn AddIndentation(psContext); // TODO FIXME integer samplers AddAssignToDest(psContext, &psInst->asOperands[0], SVT_FLOAT, GetNumSwizzleElements(&psInst->asOperands[2]), &numParenthesis); bcatcstr(glsl, "textureGather("); - - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 0)); - + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 0); bcatcstr(glsl, ", "); TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); bcatcstr(glsl, ")"); @@ -3230,8 +3354,8 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn case OPCODE_GATHER4_PO_C: { //dest, coords, offset, tex, sampler, srcReferenceValue - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[3].ui32RegisterNumber]; - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[3].ui32RegisterNumber]; #ifdef _DEBUG AddIndentation(psContext); bcatcstr(glsl, "//GATHER4_PO_C\n"); @@ -3240,12 +3364,7 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn AddIndentation(psContext); // TODO FIXME integer samplers AddAssignToDest(psContext, &psInst->asOperands[0], SVT_FLOAT, GetNumSwizzleElements(&psInst->asOperands[2]), &numParenthesis); bcatcstr(glsl, "textureGatherOffset("); - - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[3].ui32RegisterNumber, 1); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1)); - + WriteSamplerExpression(psContext, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[4].ui32RegisterNumber, 1); bcatcstr(glsl, ", "); TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); @@ -3270,7 +3389,8 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn case OPCODE_GATHER4_PO: { //dest, coords, offset, tex, sampler - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[3].ui32RegisterNumber]; #ifdef _DEBUG AddIndentation(psContext); bcatcstr(glsl, "//GATHER4_PO\n"); @@ -3279,18 +3399,9 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn AddIndentation(psContext); // TODO FIXME integer samplers AddAssignToDest(psContext, &psInst->asOperands[0], SVT_FLOAT, GetNumSwizzleElements(&psInst->asOperands[2]), &numParenthesis); bcatcstr(glsl, "textureGatherOffset("); - - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[3].ui32RegisterNumber, 0); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[4].ui32RegisterNumber, 0)); - + WriteSamplerExpression(psContext, psInst->asOperands[3].ui32RegisterNumber, psInst->asOperands[4].ui32RegisterNumber, 0); bcatcstr(glsl, ", "); - //Texture coord cannot be vec4 - //Determining if it is a vec3 for vec2 yet to be done. - psInst->asOperands[1].aui32Swizzle[2] = 0xFFFFFFFF; - psInst->asOperands[1].aui32Swizzle[3] = 0xFFFFFFFF; - TranslateOperand(psContext, &psInst->asOperands[1], TO_FLAG_NONE); + TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); bcatcstr(glsl, ", ivec2("); //ivec2 offset @@ -3309,7 +3420,8 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn case OPCODE_GATHER4_C: { //dest, coords, tex, sampler srcReferenceValue - const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; #ifdef _DEBUG AddIndentation(psContext); bcatcstr(glsl, "//GATHER4_C\n"); @@ -3318,18 +3430,9 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn AddIndentation(psContext); // TODO FIXME integer samplers AddAssignToDest(psContext, &psInst->asOperands[0], SVT_FLOAT, GetNumSwizzleElements(&psInst->asOperands[2]), &numParenthesis); bcatcstr(glsl, "textureGather("); - - if (!useCombinedTextureSamplers) - ResourceName(glsl, psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 1); - else - bconcat(glsl, TextureSamplerName(&psContext->psShader->sInfo, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1)); - + WriteSamplerExpression(psContext, psInst->asOperands[2].ui32RegisterNumber, psInst->asOperands[3].ui32RegisterNumber, 1); bcatcstr(glsl, ", "); - //Texture coord cannot be vec4 - //Determining if it is a vec3 for vec2 yet to be done. - psInst->asOperands[1].aui32Swizzle[2] = 0xFFFFFFFF; - psInst->asOperands[1].aui32Swizzle[3] = 0xFFFFFFFF; - TranslateOperand(psContext, &psInst->asOperands[1], TO_FLAG_NONE); + TranslateTexCoord(psContext, eResDim, &psInst->asOperands[1]); bcatcstr(glsl, ", "); TranslateOperand(psContext, &psInst->asOperands[4], TO_FLAG_NONE); @@ -4055,7 +4158,8 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn TranslateOperand(psContext, &psInst->asOperands[2], TO_FLAG_NONE); bcatcstr(glsl, ","); - TranslateTexCoord(psContext, + ASSERT(psInst->asOperands[2].eType == OPERAND_TYPE_RESOURCE); + TranslateTexCoord(psContext, psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], &psInst->asOperands[1]); bcatcstr(glsl, ")"); @@ -4396,11 +4500,7 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn bcatcstr(glsl, "//INEG\n"); #endif //dest = 0 - src0 - AddIndentation(psContext); - TranslateOperand(psContext, &psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_INTEGER); - bcatcstr(glsl, " = 0 - "); - TranslateOperand(psContext, &psInst->asOperands[1], TO_FLAG_NONE | TO_FLAG_INTEGER); - bcatcstr(glsl, ";\n"); + CallUnaryOp(psContext, "0 - ", psInst, 0, 1, SVT_INT); break; } case OPCODE_DERIV_RTX_COARSE: @@ -4524,21 +4624,32 @@ void TranslateInstruction(HLSLCrossCompilerContext* psContext, Instruction* psIn } case OPCODE_RESINFO: { + uint32_t destSwizzle[4]; + uint32_t destElemCount = GetOrderedSwizzleElements(&psInst->asOperands[0], OPERAND_4_COMPONENT_MASK_ALL, destSwizzle); - const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; - const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; - uint32_t destElemCount = GetNumSwizzleElements(&psInst->asOperands[0]); - uint32_t destElem; + uint32_t srcSwizzle[4]; + uint32_t srcElemCount = GetOrderedSwizzleElements(&psInst->asOperands[2], OPERAND_4_COMPONENT_MASK_ALL, srcSwizzle); + + uint32_t eleIndex; #ifdef _DEBUG AddIndentation(psContext); bcatcstr(glsl, "//RESINFO\n"); #endif - for (destElem = 0; destElem < destElemCount; ++destElem) + for (eleIndex = 0; eleIndex < min(srcElemCount, destElemCount); ++eleIndex) { - const char* swizzle[] = { ".x", ".y", ".z", ".w" }; - - GetResInfoData(psContext, psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + // note -- This doesn't follow the normal pattern for dealing with + // swizzles. We need to take into account the swizzle values + // attached to both and input and output operands, and we need + // to support the different possible modes. The code uses a + // different pattern for other operations... We could refactor + // this path to better match the other code... But it could take + // some work, because single RESINFO instructions can generate multiple + // GLSL expressions. + GetResInfoData( + psContext, psInst, + srcSwizzle[min(destSwizzle[eleIndex], srcElemCount-1)], // (min causes a smear for scalar types) + destSwizzle[eleIndex]); } break; diff --git a/src/toGLSLOperand.c b/src/toGLSLOperand.c index 0f00f2d..c7a8e78 100644 --- a/src/toGLSLOperand.c +++ b/src/toGLSLOperand.c @@ -2,6 +2,7 @@ #include "internal_includes/toGLSLDeclaration.h" #include "bstrlib.h" #include "hlslcc.h" +#include "internal_includes/languages.h" #include "internal_includes/debug.h" #include @@ -160,7 +161,7 @@ uint32_t IsSwizzleReplicated(const Operand* psOperand) return 0; } -static uint32_t GetNumberBitsSet(uint32_t a) +uint32_t GetNumberBitsSet(uint32_t a) { // Calculate number of bits in a // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 @@ -319,6 +320,109 @@ uint32_t GetNumSwizzleElementsWithMask(const Operand *psOperand, uint32_t ui32Co return count; } +static uint32_t FindBitsInMask(uint32_t mask, uint32_t result[4]) +{ + uint32_t outputCount = 0; + for (uint32_t c=0; c<4; ++c) + if (mask & (1<eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + return 1; // TODO: does mask make any sense here? + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + // Adjust component count and break to more processing + ((Operand *)psOperand)->iNumComponents = 3; + break; + case OPERAND_TYPE_IMMEDIATE32: + case OPERAND_TYPE_IMMEDIATE64: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH: + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << psOperand->iNumComponents) - 1; + + compMask &= ui32CompMask; + // Calculate bits left in compMask + return FindBitsInMask(compMask, result); + } + default: + { + break; + } + } + + if(psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if(psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t compMask = psOperand->ui32CompMask; + if (compMask == 0) + compMask = OPERAND_4_COMPONENT_MASK_ALL; + compMask &= ui32CompMask; + count = FindBitsInMask(compMask, result); + } + else + //Component Swizzle + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if(psOperand->ui32Swizzle != (NO_SWIZZLE)) + { + uint32_t i; + for(i=0; i< 4; ++i) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + + ASSERT( psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X + || psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y + || psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z + || psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W); + result[count++] = psOperand->aui32Swizzle[i]; + } + } + } + else + //Component Select 1 + if(psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + // (comp mask should be ignored in this case) + result[count++] = psOperand->aui32Swizzle[0]; + } + } + + if(!count) + { + for (uint32_t c=0; c<4; ++c) { + if (ui32CompMask & (1<iNumComponents-1)); + } + } + } + + return count; +} + void AddSwizzleUsingElementCount(HLSLCrossCompilerContext* psContext, uint32_t count) { bstring glsl = *psContext->currentGLSLString; @@ -347,6 +451,53 @@ void AddSwizzleUsingElementCount(HLSLCrossCompilerContext* psContext, uint32_t c } } +void AddSwizzleUsingOrderedElements(HLSLCrossCompilerContext* psContext, const Operand *psOperand, uint32_t ui32CompMask) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t elements[4]; + uint32_t count = GetOrderedSwizzleElements(psOperand, ui32CompMask, elements); + ASSERT(count!=0); + + bcatcstr(glsl, "."); + const char* eles[] = { "x", "y", "z", "w" }; + for (unsigned c=0; ccurrentGLSLString; + const char* eles[] = { "x", "y", "z", "w" }; + + uint32_t srcElements[4]; + uint32_t srcCount = GetOrderedSwizzleElements(psSrcOperand, OPERAND_4_COMPONENT_MASK_ALL, srcElements); + ASSERT(srcCount!=0); + + uint32_t dstElements[4]; + uint32_t dstCount = GetOrderedSwizzleElements(psMaskingOperand, OPERAND_4_COMPONENT_MASK_ALL, dstElements); + ASSERT(dstCount!=0); + + // We want to write only those components that properly overlap with the destination operand. + // It appears that if we have a situation like: + // value0.zw = value1.xy; + // + // Then psSrcOperand will have the swizzle "xxxy" + // and psMaskingOperand will have the swizzle "zw" + + bcatcstr(glsl, "."); + + // (for scalar src values, we just smear across the scalar value) + for (unsigned c=0; c= uint 0x3fffffff if (value > 0x3ffffffe) - bformata(glsl, "int(0x%Xu)", value); + bformata(glsl, "int(0x%X)", value); else bformata(glsl, "0x%X", value); break; @@ -989,12 +1140,19 @@ static void TranslateVariableNameWithMask(HLSLCrossCompilerContext* psContext, c if(ui32TOFlag & TO_FLAG_DECLARATION_NAME) { const char* name = GetDeclaredInputName(psContext, psContext->psShader->eShaderType, psOperand); - bcatcstr(glsl, name); + bcatcstr(glsl, name); + } + else + { + const uint32_t ui32Register = psOperand->aui32ArraySizes[psOperand->iIndexDims - 1]; + InOutSignature* psIn; + GetInputSignatureFromRegister(ui32Register, psOperand->eSelMode, psOperand->ui32CompMask, &psContext->psShader->sInfo, &psIn); + if ((psIn->ui32Mask == 1) && (requestedComponents > 1)) { + bformata(glsl, "vec%d(Input%d.x)", requestedComponents, psOperand->ui32RegisterNumber); + pui32IgnoreSwizzle[0] = 1; + } else + bformata(glsl, "Input%d", psOperand->ui32RegisterNumber); } - else - { - bformata(glsl, "Input%d", psOperand->ui32RegisterNumber); - } } } break; @@ -1193,6 +1351,141 @@ static void TranslateVariableNameWithMask(HLSLCrossCompilerContext* psContext, c if((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) { + // Sometimes a single instruction will use values from multiple constant buffer variables. + // So, for example, a constant buffer might look like this: + // cbuffer Buffer { float3 Vector; float Scalar; } + // Now consider the following statement: + // variable = float4(Vector, Scalar); + // Because Vector and Scalar are contiguous and aligned as a 4d vector in the constant buffer, + // the statement can become a single 4d vector load in the HLSL bytecode. In order to generate + // correct GLSL, we must handle these cases. + + const uint32_t useNewPath = + psCBuf + && (psOperand->psSubOperand[0] == NULL) + && (psOperand->psSubOperand[1] == NULL) && (index == -1); + if (useNewPath) { + uint32_t opSwizzle[4]; + uint32_t eleCount = GetOrderedSwizzleElements(psOperand, ui32CompMask, opSwizzle); + ASSERT(eleCount > 0); + + // For each separate element, find the cbuffer element that matches + ShaderVarType* vars[4]; + uint32_t indices[4]; + uint32_t rebases[4]; + for (uint32_t c=0; caui32ArraySizes[1], &opSwizzle[c], + psCBuf, &vars[c], &indices[c], &rebases[c]); + } + + // We should expect that the Type of each variable is the same. + uint32_t needConstructorExpression = 0; + for (uint32_t c=1; cType == vars[0]->Type); + + needConstructorExpression = vars[c] != vars[0]; + } + + // For scalar types we should user constructor, because swizzling not allowed for scalar values + uint32_t isScalarValue = 0; + if (vars[0]->Class == SVC_SCALAR){ + isScalarValue = 1; + for (uint32_t c = 1; c 1) && (psOperand->ui32Swizzle == XXXX_SWIZZLE)) { + isScalarValue = 1; + for (uint32_t c = 0; c < eleCount; ++c) { + if (psOperand->aui32Swizzle[c] != OPERAND_4_COMPONENT_X){ + isScalarValue = 0; + break; + } + } + } + if (isScalarValue) needConstructorExpression = 1; + } + + + // In cases where we're stitching together elements from multiple + // variables, we need to use "constructor" syntax. But we can skip + // in this the more common case where just a single variable is used. + if (needConstructorExpression) { + const char* constructor = GetConstructorForType(vars[0]->Type, eleCount); + bformata(glsl, "%s(", constructor); + } + + if (isScalarValue) { + bcatcstr(glsl, vars[0]->FullName); + } else { + int pendingComma = 0; + for (uint32_t c=0; cFullName); + + if(indices[c] != -1) + { + if ((vars[c]->Class == SVC_MATRIX_COLUMNS || vars[c]->Class == SVC_MATRIX_ROWS) && (vars[c]->Elements > 1)) + { + // Special handling for matrix arrays, open them up into vec4's + size_t matidx = indices[c] / 4; + size_t rowidx = indices[c] - (matidx*4); + bformata(glsl, "[%d][%d]", matidx, rowidx); + } + else + { + bformata(glsl, "[%d]", indices[c]); + } + } + + if (vars[c]->Class == SVC_SCALAR && end == c+1) { + for (uint32_t c2=c; c2>2))==0); + } + } else { + bcatcstr(glsl, "."); + const char* postfixes[4] = {"x", "y", "z", "w"}; + for (uint32_t c2=c; c2>2), 3)]); + } + } + + c = end; + pendingComma = 1; + } + } + + if (needConstructorExpression) { + bcatcstr(glsl, ")"); + } + + // if (!needConstructorExpression && vars[0]->Class == SVC_SCALAR) { + *pui32IgnoreSwizzle = 1; + // } + break; + } + //Work out the variable name. Don't apply swizzle to that variable yet. int32_t rebase = 0; @@ -1360,7 +1653,14 @@ static void TranslateVariableNameWithMask(HLSLCrossCompilerContext* psContext, c } case OPERAND_TYPE_SAMPLER: { - bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); + if (HaveSeparateTexturesAndSamplers(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + ResourceName(glsl, psContext, RGROUP_SAMPLER, psOperand->ui32RegisterNumber, 0); + } + else + { + bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); + } *pui32IgnoreSwizzle = 1; break; } @@ -1643,7 +1943,7 @@ SHADER_VARIABLE_TYPE GetOperandDataTypeEx(HLSLCrossCompilerContext* psContext, c return SVT_INT; } - if(GetInputSignatureFromRegister(ui32Register, &psContext->psShader->sInfo, &psIn)) + if (GetInputSignatureFromRegister(ui32Register, psOperand->eSelMode, psOperand->ui32CompMask, &psContext->psShader->sInfo, &psIn)) { if( psIn->eComponentType == INOUT_COMPONENT_UINT32) { @@ -1708,6 +2008,10 @@ SHADER_VARIABLE_TYPE GetOperandDataTypeEx(HLSLCrossCompilerContext* psContext, c { return SVT_INT; } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + { + return SVT_INT; + } default: { return SVT_FLOAT; @@ -1732,13 +2036,13 @@ void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand { ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT|TO_AUTO_BITCAST_TO_INT|TO_AUTO_BITCAST_TO_UINT); } - + if(ui32TOFlag & TO_FLAG_NAME_ONLY) { TranslateVariableName(psContext, psOperand, ui32TOFlag, &ui32IgnoreSwizzle); return; } - + switch(psOperand->eModifier) { case OPERAND_MODIFIER_NONE: @@ -1763,12 +2067,12 @@ void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand } TranslateVariableNameWithMask(psContext, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask); - + if(!ui32IgnoreSwizzle) { TranslateOperandSwizzleWithMask(psContext, psOperand, ui32ComponentMask); } - + switch(psOperand->eModifier) { case OPERAND_MODIFIER_NONE: