/************************************************************************** * * Copyright 2012-2021 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE * USE OR OTHER DEALINGS IN THE SOFTWARE. * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. * **************************************************************************/ /* * ShaderTGSI.c -- * Functions for translating shaders. */ #include "Debug.h" #include "ShaderParse.h" #include "pipe/p_state.h" #include "tgsi/tgsi_ureg.h" #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" #include "ShaderDump.h" enum dx10_opcode_format { OF_FLOAT, OF_INT, OF_UINT }; struct dx10_opcode_xlate { D3D10_SB_OPCODE_TYPE type; enum dx10_opcode_format format; uint tgsi_opcode; }; /* Opcodes that we have not even attempted to implement: */ #define TGSI_LOG_UNSUPPORTED TGSI_OPCODE_LAST /* Opcodes which do not translate directly to a TGSI opcode, but which * have at least a partial implemention coded below: */ #define TGSI_EXPAND (TGSI_OPCODE_LAST+1) static struct dx10_opcode_xlate opcode_xlate[D3D10_SB_NUM_OPCODES] = { {D3D10_SB_OPCODE_ADD, OF_FLOAT, TGSI_OPCODE_ADD}, {D3D10_SB_OPCODE_AND, OF_UINT, TGSI_OPCODE_AND}, {D3D10_SB_OPCODE_BREAK, OF_FLOAT, TGSI_OPCODE_BRK}, {D3D10_SB_OPCODE_BREAKC, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_CALL, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_CALLC, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_CASE, OF_UINT, TGSI_OPCODE_CASE}, {D3D10_SB_OPCODE_CONTINUE, OF_FLOAT, TGSI_OPCODE_CONT}, {D3D10_SB_OPCODE_CONTINUEC, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_CUT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DEFAULT, OF_FLOAT, TGSI_OPCODE_DEFAULT}, {D3D10_SB_OPCODE_DERIV_RTX, OF_FLOAT, TGSI_OPCODE_DDX}, {D3D10_SB_OPCODE_DERIV_RTY, OF_FLOAT, TGSI_OPCODE_DDY}, {D3D10_SB_OPCODE_DISCARD, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DIV, OF_FLOAT, TGSI_OPCODE_DIV}, {D3D10_SB_OPCODE_DP2, OF_FLOAT, TGSI_OPCODE_DP2}, {D3D10_SB_OPCODE_DP3, OF_FLOAT, TGSI_OPCODE_DP3}, {D3D10_SB_OPCODE_DP4, OF_FLOAT, TGSI_OPCODE_DP4}, {D3D10_SB_OPCODE_ELSE, OF_FLOAT, TGSI_OPCODE_ELSE}, {D3D10_SB_OPCODE_EMIT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_EMITTHENCUT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_ENDIF, OF_FLOAT, TGSI_OPCODE_ENDIF}, {D3D10_SB_OPCODE_ENDLOOP, OF_FLOAT, TGSI_OPCODE_ENDLOOP}, {D3D10_SB_OPCODE_ENDSWITCH, OF_FLOAT, TGSI_OPCODE_ENDSWITCH}, {D3D10_SB_OPCODE_EQ, OF_FLOAT, TGSI_OPCODE_FSEQ}, {D3D10_SB_OPCODE_EXP, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_FRC, OF_FLOAT, TGSI_OPCODE_FRC}, {D3D10_SB_OPCODE_FTOI, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_FTOU, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_GE, OF_FLOAT, TGSI_OPCODE_FSGE}, {D3D10_SB_OPCODE_IADD, OF_INT, TGSI_OPCODE_UADD}, {D3D10_SB_OPCODE_IF, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_IEQ, OF_INT, TGSI_OPCODE_USEQ}, {D3D10_SB_OPCODE_IGE, OF_INT, TGSI_OPCODE_ISGE}, {D3D10_SB_OPCODE_ILT, OF_INT, TGSI_OPCODE_ISLT}, {D3D10_SB_OPCODE_IMAD, OF_INT, TGSI_OPCODE_UMAD}, {D3D10_SB_OPCODE_IMAX, OF_INT, TGSI_OPCODE_IMAX}, {D3D10_SB_OPCODE_IMIN, OF_INT, TGSI_OPCODE_IMIN}, {D3D10_SB_OPCODE_IMUL, OF_INT, TGSI_EXPAND}, {D3D10_SB_OPCODE_INE, OF_INT, TGSI_OPCODE_USNE}, {D3D10_SB_OPCODE_INEG, OF_INT, TGSI_OPCODE_INEG}, {D3D10_SB_OPCODE_ISHL, OF_INT, TGSI_OPCODE_SHL}, {D3D10_SB_OPCODE_ISHR, OF_INT, TGSI_OPCODE_ISHR}, {D3D10_SB_OPCODE_ITOF, OF_INT, TGSI_OPCODE_I2F}, {D3D10_SB_OPCODE_LABEL, OF_INT, TGSI_EXPAND}, {D3D10_SB_OPCODE_LD, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_LD_MS, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_LOG, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_LOOP, OF_FLOAT, TGSI_OPCODE_BGNLOOP}, {D3D10_SB_OPCODE_LT, OF_FLOAT, TGSI_OPCODE_FSLT}, {D3D10_SB_OPCODE_MAD, OF_FLOAT, TGSI_OPCODE_MAD}, {D3D10_SB_OPCODE_MIN, OF_FLOAT, TGSI_OPCODE_MIN}, {D3D10_SB_OPCODE_MAX, OF_FLOAT, TGSI_OPCODE_MAX}, {D3D10_SB_OPCODE_CUSTOMDATA, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_MOV, OF_UINT, TGSI_OPCODE_MOV}, {D3D10_SB_OPCODE_MOVC, OF_UINT, TGSI_OPCODE_UCMP}, {D3D10_SB_OPCODE_MUL, OF_FLOAT, TGSI_OPCODE_MUL}, {D3D10_SB_OPCODE_NE, OF_FLOAT, TGSI_OPCODE_FSNE}, {D3D10_SB_OPCODE_NOP, OF_FLOAT, TGSI_OPCODE_NOP}, {D3D10_SB_OPCODE_NOT, OF_UINT, TGSI_OPCODE_NOT}, {D3D10_SB_OPCODE_OR, OF_UINT, TGSI_OPCODE_OR}, {D3D10_SB_OPCODE_RESINFO, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_RET, OF_FLOAT, TGSI_OPCODE_RET}, {D3D10_SB_OPCODE_RETC, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_ROUND_NE, OF_FLOAT, TGSI_OPCODE_ROUND}, {D3D10_SB_OPCODE_ROUND_NI, OF_FLOAT, TGSI_OPCODE_FLR}, {D3D10_SB_OPCODE_ROUND_PI, OF_FLOAT, TGSI_OPCODE_CEIL}, {D3D10_SB_OPCODE_ROUND_Z, OF_FLOAT, TGSI_OPCODE_TRUNC}, {D3D10_SB_OPCODE_RSQ, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE_C, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE_C_LZ, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE_L, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE_D, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SAMPLE_B, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SQRT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_SWITCH, OF_UINT, TGSI_OPCODE_SWITCH}, {D3D10_SB_OPCODE_SINCOS, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_UDIV, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_ULT, OF_UINT, TGSI_OPCODE_USLT}, {D3D10_SB_OPCODE_UGE, OF_UINT, TGSI_OPCODE_USGE}, {D3D10_SB_OPCODE_UMUL, OF_UINT, TGSI_EXPAND}, {D3D10_SB_OPCODE_UMAD, OF_UINT, TGSI_OPCODE_UMAD}, {D3D10_SB_OPCODE_UMAX, OF_UINT, TGSI_OPCODE_UMAX}, {D3D10_SB_OPCODE_UMIN, OF_UINT, TGSI_OPCODE_UMIN}, {D3D10_SB_OPCODE_USHR, OF_UINT, TGSI_OPCODE_USHR}, {D3D10_SB_OPCODE_UTOF, OF_UINT, TGSI_OPCODE_U2F}, {D3D10_SB_OPCODE_XOR, OF_UINT, TGSI_OPCODE_XOR}, {D3D10_SB_OPCODE_DCL_RESOURCE, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_SAMPLER, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INDEX_RANGE, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT_SGV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT_SIV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT_PS, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT_PS_SGV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INPUT_PS_SIV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_OUTPUT, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_OUTPUT_SGV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_OUTPUT_SIV, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_TEMPS, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP, OF_FLOAT, TGSI_EXPAND}, {D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_SB_OPCODE_RESERVED0, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_1_SB_OPCODE_LOD, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_1_SB_OPCODE_GATHER4, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_1_SB_OPCODE_SAMPLE_POS, OF_FLOAT, TGSI_LOG_UNSUPPORTED}, {D3D10_1_SB_OPCODE_SAMPLE_INFO, OF_FLOAT, TGSI_LOG_UNSUPPORTED} }; #define SHADER_MAX_TEMPS 4096 #define SHADER_MAX_INPUTS 32 #define SHADER_MAX_OUTPUTS 32 #define SHADER_MAX_CONSTS 4096 #define SHADER_MAX_RESOURCES PIPE_MAX_SHADER_SAMPLER_VIEWS #define SHADER_MAX_SAMPLERS PIPE_MAX_SAMPLERS #define SHADER_MAX_INDEXABLE_TEMPS 4096 struct Shader_call { unsigned d3d_label; unsigned tgsi_label_token; }; struct Shader_label { unsigned d3d_label; unsigned tgsi_insn_no; }; struct Shader_resource { uint target; /* TGSI_TEXTURE_x */ }; struct Shader_xlate { struct ureg_program *ureg; uint vertices_in; uint declared_temps; struct ureg_dst temps[SHADER_MAX_TEMPS]; struct ureg_dst output_depth; struct Shader_resource resources[SHADER_MAX_RESOURCES]; struct ureg_src sv[SHADER_MAX_RESOURCES]; struct ureg_src samplers[SHADER_MAX_SAMPLERS]; struct ureg_src imms; struct ureg_src prim_id; uint temp_offset; uint indexable_temp_offsets[SHADER_MAX_INDEXABLE_TEMPS]; struct { boolean declared; uint writemask; uint siv_name; boolean overloaded; struct ureg_src reg; } inputs[SHADER_MAX_INPUTS]; struct { struct ureg_dst reg[4]; } outputs[SHADER_MAX_OUTPUTS]; struct { uint d3d; uint tgsi; } clip_distance_mapping[2], cull_distance_mapping[2]; uint num_clip_distances_declared; uint num_cull_distances_declared; struct Shader_call *calls; uint num_calls; uint max_calls; struct Shader_label *labels; uint num_labels; uint max_labels; }; static uint translate_interpolation(D3D10_SB_INTERPOLATION_MODE interpolation) { switch (interpolation) { case D3D10_SB_INTERPOLATION_UNDEFINED: assert(0); return TGSI_INTERPOLATE_LINEAR; case D3D10_SB_INTERPOLATION_CONSTANT: return TGSI_INTERPOLATE_CONSTANT; case D3D10_SB_INTERPOLATION_LINEAR: return TGSI_INTERPOLATE_PERSPECTIVE; case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE: return TGSI_INTERPOLATE_LINEAR; case D3D10_SB_INTERPOLATION_LINEAR_CENTROID: case D3D10_SB_INTERPOLATION_LINEAR_SAMPLE: // DX10.1 LOG_UNSUPPORTED(TRUE); return TGSI_INTERPOLATE_PERSPECTIVE; case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: case D3D10_SB_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: // DX10.1 LOG_UNSUPPORTED(TRUE); return TGSI_INTERPOLATE_LINEAR; } assert(0); return TGSI_INTERPOLATE_LINEAR; } static uint translate_system_name(D3D10_SB_NAME name) { switch (name) { case D3D10_SB_NAME_UNDEFINED: assert(0); /* should not happen */ return TGSI_SEMANTIC_GENERIC; case D3D10_SB_NAME_POSITION: return TGSI_SEMANTIC_POSITION; case D3D10_SB_NAME_CLIP_DISTANCE: case D3D10_SB_NAME_CULL_DISTANCE: return TGSI_SEMANTIC_CLIPDIST; case D3D10_SB_NAME_PRIMITIVE_ID: return TGSI_SEMANTIC_PRIMID; case D3D10_SB_NAME_INSTANCE_ID: return TGSI_SEMANTIC_INSTANCEID; case D3D10_SB_NAME_VERTEX_ID: return TGSI_SEMANTIC_VERTEXID_NOBASE; case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX: return TGSI_SEMANTIC_VIEWPORT_INDEX; case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX: return TGSI_SEMANTIC_LAYER; case D3D10_SB_NAME_IS_FRONT_FACE: return TGSI_SEMANTIC_FACE; case D3D10_SB_NAME_SAMPLE_INDEX: LOG_UNSUPPORTED(TRUE); return TGSI_SEMANTIC_GENERIC; } assert(0); return TGSI_SEMANTIC_GENERIC; } static uint translate_semantic_index(struct Shader_xlate *sx, D3D10_SB_NAME name, const struct Shader_dst_operand *operand) { unsigned idx; switch (name) { case D3D10_SB_NAME_CLIP_DISTANCE: case D3D10_SB_NAME_CULL_DISTANCE: if (sx->clip_distance_mapping[0].d3d == operand->base.index[0].imm) { idx = sx->clip_distance_mapping[0].tgsi; } else { assert(sx->clip_distance_mapping[1].d3d == operand->base.index[0].imm); idx = sx->clip_distance_mapping[1].tgsi; } break; /* case D3D10_SB_NAME_CULL_DISTANCE: if (sx->cull_distance_mapping[0].d3d == operand->base.index[0].imm) { idx = sx->cull_distance_mapping[0].tgsi; } else { assert(sx->cull_distance_mapping[1].d3d == operand->base.index[0].imm); idx = sx->cull_distance_mapping[1].tgsi; } break;*/ default: idx = 0; } return idx; } static enum tgsi_return_type trans_dcl_ret_type(D3D10_SB_RESOURCE_RETURN_TYPE d3drettype) { switch (d3drettype) { case D3D10_SB_RETURN_TYPE_UNORM: return TGSI_RETURN_TYPE_UNORM; case D3D10_SB_RETURN_TYPE_SNORM: return TGSI_RETURN_TYPE_SNORM; case D3D10_SB_RETURN_TYPE_SINT: return TGSI_RETURN_TYPE_SINT; case D3D10_SB_RETURN_TYPE_UINT: return TGSI_RETURN_TYPE_UINT; case D3D10_SB_RETURN_TYPE_FLOAT: return TGSI_RETURN_TYPE_FLOAT; case D3D10_SB_RETURN_TYPE_MIXED: default: LOG_UNSUPPORTED(TRUE); return TGSI_RETURN_TYPE_FLOAT; } } static void declare_vertices_in(struct Shader_xlate *sx, unsigned in) { /* Make sure vertices_in is consistent with input primitive * and other input declarations. */ if (sx->vertices_in) { assert(sx->vertices_in == in); } else { sx->vertices_in = in; } } struct swizzle_mapping { unsigned x; unsigned y; unsigned z; unsigned w; }; /* mapping of writmask to swizzles */ static const struct swizzle_mapping writemask_to_swizzle[] = { { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_NONE { TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_X { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_Y { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y }, //TGSI_WRITEMASK_XY { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_Z { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_XZ { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z }, //TGSI_WRITEMASK_YZ { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X }, //TGSI_WRITEMASK_XYZ { TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_W { TGSI_SWIZZLE_X, TGSI_SWIZZLE_W, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XW { TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YW { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_W, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYW { TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_ZW { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XZW { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_YZW { TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W }, //TGSI_WRITEMASK_XYZW }; static struct ureg_src swizzle_reg(struct ureg_src src, uint writemask, unsigned siv_name) { switch (siv_name) { case D3D10_SB_NAME_PRIMITIVE_ID: case D3D10_SB_NAME_INSTANCE_ID: case D3D10_SB_NAME_VERTEX_ID: case D3D10_SB_NAME_VIEWPORT_ARRAY_INDEX: case D3D10_SB_NAME_RENDER_TARGET_ARRAY_INDEX: case D3D10_SB_NAME_IS_FRONT_FACE: return ureg_scalar(src, TGSI_SWIZZLE_X); default: { const struct swizzle_mapping *swizzle = &writemask_to_swizzle[writemask]; return ureg_swizzle(src, swizzle->x, swizzle->y, swizzle->z, swizzle->w); } } } static void dcl_base_output(struct Shader_xlate *sx, struct ureg_program *ureg, struct ureg_dst reg, const struct Shader_dst_operand *operand) { unsigned writemask = operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; unsigned idx = operand->base.index[0].imm; unsigned i; if (!writemask) { sx->outputs[idx].reg[0] = reg; sx->outputs[idx].reg[1] = reg; sx->outputs[idx].reg[2] = reg; sx->outputs[idx].reg[3] = reg; return; } for (i = 0; i < 4; ++i) { unsigned mask = 1 << i; if ((writemask & mask)) { sx->outputs[idx].reg[i] = reg; } } } static void dcl_base_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *operand, struct ureg_src dcl_reg, uint index, uint siv_name) { unsigned writemask = operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; if (sx->inputs[index].declared && !sx->inputs[index].overloaded) { struct ureg_dst temp = ureg_DECL_temporary(sx->ureg); ureg_MOV(ureg, ureg_writemask(temp, sx->inputs[index].writemask), swizzle_reg(sx->inputs[index].reg, sx->inputs[index].writemask, sx->inputs[index].siv_name)); ureg_MOV(ureg, ureg_writemask(temp, writemask), swizzle_reg(dcl_reg, writemask, siv_name)); sx->inputs[index].reg = ureg_src(temp); sx->inputs[index].overloaded = TRUE; sx->inputs[index].writemask |= writemask; } else if (sx->inputs[index].overloaded) { struct ureg_dst temp = ureg_dst(sx->inputs[index].reg); ureg_MOV(ureg, ureg_writemask(temp, writemask), swizzle_reg(dcl_reg, writemask, siv_name)); sx->inputs[index].writemask |= writemask; } else { assert(!sx->inputs[index].declared); sx->inputs[index].reg = dcl_reg; sx->inputs[index].declared = TRUE; sx->inputs[index].writemask = writemask; sx->inputs[index].siv_name = siv_name; } } static void dcl_vs_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst) { struct ureg_src reg; assert(dst->base.index_dim == 1); assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); reg = ureg_DECL_vs_input(ureg, dst->base.index[0].imm); dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, D3D10_SB_NAME_UNDEFINED); } static void dcl_gs_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst) { if (dst->base.index_dim == 2) { assert(dst->base.index[1].imm < SHADER_MAX_INPUTS); declare_vertices_in(sx, dst->base.index[0].imm); /* XXX: Implement declaration masks in gallium. */ if (!sx->inputs[dst->base.index[1].imm].reg.File) { struct ureg_src reg = ureg_DECL_input(ureg, TGSI_SEMANTIC_GENERIC, dst->base.index[1].imm, 0, 1); dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm, D3D10_SB_NAME_UNDEFINED); } } else { assert(dst->base.type == D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID); assert(dst->base.index_dim == 0); sx->prim_id = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_PRIMID, 0); } } static void dcl_sgv_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst, uint dcl_siv_name) { struct ureg_src reg; assert(dst->base.index_dim == 1); assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); reg = ureg_DECL_system_value(ureg, translate_system_name(dcl_siv_name), 0); dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, dcl_siv_name); } static void dcl_siv_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst, uint dcl_siv_name) { struct ureg_src reg; assert(dst->base.index_dim == 2); assert(dst->base.index[1].imm < SHADER_MAX_INPUTS); declare_vertices_in(sx, dst->base.index[0].imm); reg = ureg_DECL_input(ureg, translate_system_name(dcl_siv_name), 0, 0, 1); dcl_base_input(sx, ureg, dst, reg, dst->base.index[1].imm, dcl_siv_name); } static void dcl_ps_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst, uint dcl_in_ps_interp) { struct ureg_src reg; assert(dst->base.index_dim == 1); assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); reg = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, dst->base.index[0].imm, translate_interpolation(dcl_in_ps_interp)); dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, D3D10_SB_NAME_UNDEFINED); } static void dcl_ps_sgv_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst, uint dcl_siv_name) { struct ureg_src reg; assert(dst->base.index_dim == 1); assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); if (dcl_siv_name == D3D10_SB_NAME_POSITION) { ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, TGSI_FS_COORD_ORIGIN_UPPER_LEFT); ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER); } reg = ureg_DECL_fs_input(ureg, translate_system_name(dcl_siv_name), 0, TGSI_INTERPOLATE_CONSTANT); if (dcl_siv_name == D3D10_SB_NAME_IS_FRONT_FACE) { /* We need to map gallium's front_face to the one expected * by D3D10 */ struct ureg_dst tmp = ureg_DECL_temporary(ureg); tmp = ureg_writemask(tmp, TGSI_WRITEMASK_X); ureg_CMP(ureg, tmp, reg, ureg_imm1i(ureg, 0), ureg_imm1i(ureg, -1)); reg = ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_X); } dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, dcl_siv_name); } static void dcl_ps_siv_input(struct Shader_xlate *sx, struct ureg_program *ureg, const struct Shader_dst_operand *dst, uint dcl_siv_name, uint dcl_in_ps_interp) { struct ureg_src reg; assert(dst->base.index_dim == 1); assert(dst->base.index[0].imm < SHADER_MAX_INPUTS); reg = ureg_DECL_fs_input(ureg, translate_system_name(dcl_siv_name), 0, translate_interpolation(dcl_in_ps_interp)); if (dcl_siv_name == D3D10_SB_NAME_POSITION) { /* D3D10 expects reciprocal of interpolated 1/w as 4th component, * gallium/GL just interpolated 1/w */ struct ureg_dst tmp = ureg_DECL_temporary(ureg); ureg_MOV(ureg, tmp, reg); ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), ureg_scalar(ureg_src(tmp), TGSI_SWIZZLE_W)); reg = ureg_src(tmp); } dcl_base_input(sx, ureg, dst, reg, dst->base.index[0].imm, dcl_siv_name); } static struct ureg_src translate_relative_operand(struct Shader_xlate *sx, const struct Shader_relative_operand *operand) { struct ureg_src reg; switch (operand->type) { case D3D10_SB_OPERAND_TYPE_TEMP: assert(operand->index[0].imm < SHADER_MAX_TEMPS); reg = ureg_src(sx->temps[sx->temp_offset + operand->index[0].imm]); break; case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: reg = sx->prim_id; break; case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: assert(operand->index[1].imm < SHADER_MAX_TEMPS); reg = ureg_src(sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] + operand->index[1].imm]); break; case D3D10_SB_OPERAND_TYPE_INPUT: case D3D10_SB_OPERAND_TYPE_OUTPUT: case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: case D3D10_SB_OPERAND_TYPE_SAMPLER: case D3D10_SB_OPERAND_TYPE_RESOURCE: case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: case D3D10_SB_OPERAND_TYPE_LABEL: case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH: case D3D10_SB_OPERAND_TYPE_NULL: case D3D10_SB_OPERAND_TYPE_RASTERIZER: case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: LOG_UNSUPPORTED(TRUE); reg = ureg_src(ureg_DECL_temporary(sx->ureg)); break; default: assert(0); /* should never happen */ reg = ureg_src(ureg_DECL_temporary(sx->ureg)); } reg = ureg_scalar(reg, operand->comp); return reg; } static struct ureg_dst translate_operand(struct Shader_xlate *sx, const struct Shader_operand *operand, unsigned writemask) { struct ureg_dst reg; switch (operand->type) { case D3D10_SB_OPERAND_TYPE_TEMP: assert(operand->index_dim == 1); assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->index[0].imm < SHADER_MAX_TEMPS); reg = sx->temps[sx->temp_offset + operand->index[0].imm]; break; case D3D10_SB_OPERAND_TYPE_OUTPUT: assert(operand->index_dim == 1); assert(operand->index[0].imm < SHADER_MAX_OUTPUTS); if (operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32) { if (!writemask) { reg = sx->outputs[operand->index[0].imm].reg[0]; } else { unsigned i; for (i = 0; i < 4; ++i) { unsigned mask = 1 << i; if ((writemask & mask)) { reg = sx->outputs[operand->index[0].imm].reg[i]; break; } } } } else { struct ureg_src addr = translate_relative_operand(sx, &operand->index[0].rel); assert(operand->index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE); reg = ureg_dst_indirect(sx->outputs[operand->index[0].imm].reg[0], addr); } break; case D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH: assert(operand->index_dim == 0); reg = sx->output_depth; break; case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: assert(operand->index_dim == 0); reg = ureg_dst(sx->prim_id); break; case D3D10_SB_OPERAND_TYPE_INPUT: case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: case D3D10_SB_OPERAND_TYPE_IMMEDIATE64: case D3D10_SB_OPERAND_TYPE_SAMPLER: case D3D10_SB_OPERAND_TYPE_RESOURCE: case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: case D3D10_SB_OPERAND_TYPE_LABEL: case D3D10_SB_OPERAND_TYPE_NULL: case D3D10_SB_OPERAND_TYPE_RASTERIZER: case D3D10_SB_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: /* XXX: Translate more operands types. */ LOG_UNSUPPORTED(TRUE); reg = ureg_DECL_temporary(sx->ureg); } return reg; } static struct ureg_src translate_indexable_temp(struct Shader_xlate *sx, const struct Shader_operand *operand) { struct ureg_src reg; switch (operand->index[1].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: reg = ureg_src( sx->temps[sx->indexable_temp_offsets[operand->index[0].imm] + operand->index[1].imm]); break; case D3D10_SB_OPERAND_INDEX_RELATIVE: reg = ureg_src_indirect( ureg_src(sx->temps[ sx->indexable_temp_offsets[operand->index[0].imm]]), translate_relative_operand(sx, &operand->index[1].rel)); break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: reg = ureg_src_indirect( ureg_src(sx->temps[ operand->index[1].imm + sx->indexable_temp_offsets[operand->index[0].imm]]), translate_relative_operand(sx, &operand->index[1].rel)); break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); reg = ureg_src(ureg_DECL_temporary(sx->ureg)); } return reg; } static struct ureg_dst translate_dst_operand(struct Shader_xlate *sx, const struct Shader_dst_operand *operand, boolean saturate) { struct ureg_dst reg; unsigned writemask = operand->mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT; assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT) == 4); assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_X >> 4) == TGSI_WRITEMASK_X); assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Y >> 4) == TGSI_WRITEMASK_Y); assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_Z >> 4) == TGSI_WRITEMASK_Z); assert((D3D10_SB_OPERAND_4_COMPONENT_MASK_W >> 4) == TGSI_WRITEMASK_W); switch (operand->base.type) { case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: assert(operand->base.index_dim == 2); assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS); reg = ureg_dst(translate_indexable_temp(sx, &operand->base)); break; default: reg = translate_operand(sx, &operand->base, writemask); } /* oDepth often has an empty writemask */ if (operand->base.type != D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { reg = ureg_writemask(reg, writemask); } if (saturate) { reg = ureg_saturate(reg); } return reg; } static struct ureg_src translate_src_operand(struct Shader_xlate *sx, const struct Shader_src_operand *operand, const enum dx10_opcode_format format) { struct ureg_src reg; switch (operand->base.type) { case D3D10_SB_OPERAND_TYPE_INPUT: if (operand->base.index_dim == 1) { switch (operand->base.index[0].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: assert(operand->base.index[0].imm < SHADER_MAX_INPUTS); reg = sx->inputs[operand->base.index[0].imm].reg; break; case D3D10_SB_OPERAND_INDEX_RELATIVE: { struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[0].rel); reg = ureg_src_indirect(sx->inputs[0].reg, tmp); } break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[0].rel); reg = ureg_src_indirect(sx->inputs[operand->base.index[0].imm].reg, tmp); } break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); } } else { assert(operand->base.index_dim == 2); assert(operand->base.index[1].imm < SHADER_MAX_INPUTS); switch (operand->base.index[1].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: reg = sx->inputs[operand->base.index[1].imm].reg; break; case D3D10_SB_OPERAND_INDEX_RELATIVE: { struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[1].rel); reg = ureg_src_indirect(sx->inputs[0].reg, tmp); } break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[1].rel); reg = ureg_src_indirect(sx->inputs[operand->base.index[1].imm].reg, tmp); } break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); } switch (operand->base.index[0].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: reg = ureg_src_dimension(reg, operand->base.index[0].imm); break; case D3D10_SB_OPERAND_INDEX_RELATIVE:{ struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[0].rel); reg = ureg_src_dimension_indirect(reg, tmp, 0); } break; case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: { struct ureg_src tmp = translate_relative_operand(sx, &operand->base.index[0].rel); reg = ureg_src_dimension_indirect(reg, tmp, operand->base.index[0].imm); } break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); } } break; case D3D10_SB_OPERAND_TYPE_INDEXABLE_TEMP: assert(operand->base.index_dim == 2); assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->base.index[0].imm < SHADER_MAX_INDEXABLE_TEMPS); reg = translate_indexable_temp(sx, &operand->base); break; case D3D10_SB_OPERAND_TYPE_IMMEDIATE32: switch (format) { case OF_FLOAT: reg = ureg_imm4f(sx->ureg, operand->imm[0].f32, operand->imm[1].f32, operand->imm[2].f32, operand->imm[3].f32); break; case OF_INT: reg = ureg_imm4i(sx->ureg, operand->imm[0].i32, operand->imm[1].i32, operand->imm[2].i32, operand->imm[3].i32); break; case OF_UINT: reg = ureg_imm4u(sx->ureg, operand->imm[0].u32, operand->imm[1].u32, operand->imm[2].u32, operand->imm[3].u32); break; default: assert(0); reg = ureg_src(ureg_DECL_temporary(sx->ureg)); } break; case D3D10_SB_OPERAND_TYPE_SAMPLER: assert(operand->base.index_dim == 1); assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->base.index[0].imm < SHADER_MAX_SAMPLERS); reg = sx->samplers[operand->base.index[0].imm]; break; case D3D10_SB_OPERAND_TYPE_RESOURCE: assert(operand->base.index_dim == 1); assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->base.index[0].imm < SHADER_MAX_RESOURCES); reg = sx->sv[operand->base.index[0].imm]; break; case D3D10_SB_OPERAND_TYPE_CONSTANT_BUFFER: assert(operand->base.index_dim == 2); assert(operand->base.index[0].index_rep == D3D10_SB_OPERAND_INDEX_IMMEDIATE32); assert(operand->base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS); switch (operand->base.index[1].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: assert(operand->base.index[1].imm < SHADER_MAX_CONSTS); reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm); reg = ureg_src_dimension(reg, operand->base.index[0].imm); break; case D3D10_SB_OPERAND_INDEX_RELATIVE: case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: reg = ureg_src_register(TGSI_FILE_CONSTANT, operand->base.index[1].imm); reg = ureg_src_indirect( reg, translate_relative_operand(sx, &operand->base.index[1].rel)); reg = ureg_src_dimension(reg, operand->base.index[0].imm); break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); } break; case D3D10_SB_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: assert(operand->base.index_dim == 1); switch (operand->base.index[0].index_rep) { case D3D10_SB_OPERAND_INDEX_IMMEDIATE32: reg = sx->imms; reg.Index += operand->base.index[0].imm; break; case D3D10_SB_OPERAND_INDEX_RELATIVE: case D3D10_SB_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: reg = sx->imms; reg.Index += operand->base.index[0].imm; reg = ureg_src_indirect( sx->imms, translate_relative_operand(sx, &operand->base.index[0].rel)); break; default: /* XXX: Other index representations. */ LOG_UNSUPPORTED(TRUE); } break; case D3D10_SB_OPERAND_TYPE_INPUT_PRIMITIVEID: reg = sx->prim_id; break; default: reg = ureg_src(translate_operand(sx, &operand->base, 0)); } reg = ureg_swizzle(reg, operand->swizzle[0], operand->swizzle[1], operand->swizzle[2], operand->swizzle[3]); switch (operand->modifier) { case D3D10_SB_OPERAND_MODIFIER_NONE: break; case D3D10_SB_OPERAND_MODIFIER_NEG: reg = ureg_negate(reg); break; case D3D10_SB_OPERAND_MODIFIER_ABS: reg = ureg_abs(reg); break; case D3D10_SB_OPERAND_MODIFIER_ABSNEG: reg = ureg_negate(ureg_abs(reg)); break; default: assert(0); } return reg; } static uint translate_resource_dimension(D3D10_SB_RESOURCE_DIMENSION dim) { switch (dim) { case D3D10_SB_RESOURCE_DIMENSION_UNKNOWN: return TGSI_TEXTURE_UNKNOWN; case D3D10_SB_RESOURCE_DIMENSION_BUFFER: return TGSI_TEXTURE_BUFFER; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1D: return TGSI_TEXTURE_1D; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2D: return TGSI_TEXTURE_2D; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMS: return TGSI_TEXTURE_2D_MSAA; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE3D: return TGSI_TEXTURE_3D; case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBE: return TGSI_TEXTURE_CUBE; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE1DARRAY: return TGSI_TEXTURE_1D_ARRAY; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DARRAY: return TGSI_TEXTURE_2D_ARRAY; case D3D10_SB_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: return TGSI_TEXTURE_2D_ARRAY_MSAA; case D3D10_SB_RESOURCE_DIMENSION_TEXTURECUBEARRAY: return TGSI_TEXTURE_CUBE_ARRAY; default: assert(0); return TGSI_TEXTURE_UNKNOWN; } } static uint texture_dim_from_tgsi_target(unsigned tgsi_target) { switch (tgsi_target) { case TGSI_TEXTURE_BUFFER: case TGSI_TEXTURE_1D: case TGSI_TEXTURE_1D_ARRAY: return 1; case TGSI_TEXTURE_2D: case TGSI_TEXTURE_2D_MSAA: case TGSI_TEXTURE_CUBE: case TGSI_TEXTURE_2D_ARRAY: case TGSI_TEXTURE_2D_ARRAY_MSAA: return 2; case TGSI_TEXTURE_3D: return 3; case TGSI_TEXTURE_UNKNOWN: default: assert(0); return 1; } } static boolean operand_is_scalar(const struct Shader_src_operand *operand) { return operand->swizzle[0] == operand->swizzle[1] && operand->swizzle[1] == operand->swizzle[2] && operand->swizzle[2] == operand->swizzle[3]; } static void Shader_add_call(struct Shader_xlate *sx, unsigned d3d_label, unsigned tgsi_label_token) { ASSERT(sx->num_calls < sx->max_calls); sx->calls[sx->num_calls].d3d_label = d3d_label; sx->calls[sx->num_calls].tgsi_label_token = tgsi_label_token; sx->num_calls++; } static void Shader_add_label(struct Shader_xlate *sx, unsigned d3d_label, unsigned tgsi_insn_no) { ASSERT(sx->num_labels < sx->max_labels); sx->labels[sx->num_labels].d3d_label = d3d_label; sx->labels[sx->num_labels].tgsi_insn_no = tgsi_insn_no; sx->num_labels++; } static void sample_ureg_emit(struct ureg_program *ureg, unsigned tgsi_opcode, unsigned num_src, struct Shader_opcode *opcode, struct ureg_dst dst, struct ureg_src *src) { unsigned num_offsets = 0; struct tgsi_texture_offset texoffsets; memset(&texoffsets, 0, sizeof texoffsets); if (opcode->imm_texel_offset.u || opcode->imm_texel_offset.v || opcode->imm_texel_offset.w) { struct ureg_src offsetreg; num_offsets = 1; /* don't actually always need all 3 values */ offsetreg = ureg_imm3i(ureg, opcode->imm_texel_offset.u, opcode->imm_texel_offset.v, opcode->imm_texel_offset.w); texoffsets.File = offsetreg.File; texoffsets.Index = offsetreg.Index; texoffsets.SwizzleX = offsetreg.SwizzleX; texoffsets.SwizzleY = offsetreg.SwizzleY; texoffsets.SwizzleZ = offsetreg.SwizzleZ; } ureg_tex_insn(ureg, tgsi_opcode, &dst, 1, TGSI_TEXTURE_UNKNOWN, TGSI_RETURN_TYPE_UNKNOWN, &texoffsets, num_offsets, src, num_src); } typedef void (*unary_ureg_func)(struct ureg_program *ureg, struct ureg_dst dst, struct ureg_src src); static void expand_unary_to_scalarf(struct ureg_program *ureg, unary_ureg_func func, struct Shader_xlate *sx, struct Shader_opcode *opcode) { struct ureg_dst tmp = ureg_DECL_temporary(ureg); struct ureg_dst dst = translate_dst_operand(sx, &opcode->dst[0], opcode->saturate); struct ureg_src src = translate_src_operand(sx, &opcode->src[0], OF_FLOAT); struct ureg_dst scalar_dst; ureg_MOV(ureg, tmp, src); src = ureg_src(tmp); scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_X); if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { func(ureg, scalar_dst, ureg_scalar(src, TGSI_SWIZZLE_X)); } scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Y); if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { func(ureg, scalar_dst, ureg_scalar(src, TGSI_SWIZZLE_Y)); } scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_Z); if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { func(ureg, scalar_dst, ureg_scalar(src, TGSI_SWIZZLE_Z)); } scalar_dst = ureg_writemask(dst, TGSI_WRITEMASK_W); if (scalar_dst.WriteMask != TGSI_WRITEMASK_NONE) { func(ureg, scalar_dst, ureg_scalar(src, TGSI_SWIZZLE_W)); } ureg_release_temporary(ureg, tmp); } const struct tgsi_token * Shader_tgsi_translate(const unsigned *code, unsigned *output_mapping) { struct Shader_xlate sx; struct Shader_parser parser; struct ureg_program *ureg = NULL; struct Shader_opcode opcode; const struct tgsi_token *tokens = NULL; uint nr_tokens; boolean shader_dumped = FALSE; boolean inside_sub = FALSE; uint i, j; memset(&sx, 0, sizeof sx); Shader_parse_init(&parser, code); if (st_debug & ST_DEBUG_TGSI) { dx10_shader_dump_tokens(code); shader_dumped = TRUE; } sx.max_calls = 64; sx.calls = (struct Shader_call *)MALLOC(sx.max_calls * sizeof(struct Shader_call)); sx.num_calls = 0; sx.max_labels = 64; sx.labels = (struct Shader_label *)MALLOC(sx.max_labels * sizeof(struct Shader_call)); sx.num_labels = 0; /* Header. */ switch (parser.header.type) { case D3D10_SB_PIXEL_SHADER: ureg = ureg_create(PIPE_SHADER_FRAGMENT); break; case D3D10_SB_VERTEX_SHADER: ureg = ureg_create(PIPE_SHADER_VERTEX); break; case D3D10_SB_GEOMETRY_SHADER: ureg = ureg_create(PIPE_SHADER_GEOMETRY); break; } assert(ureg); sx.ureg = ureg; while (Shader_parse_opcode(&parser, &opcode)) { const struct dx10_opcode_xlate *ox; assert(opcode.type < D3D10_SB_NUM_OPCODES); ox = &opcode_xlate[opcode.type]; switch (opcode.type) { case D3D10_SB_OPCODE_EXP: expand_unary_to_scalarf(ureg, ureg_EX2, &sx, &opcode); break; case D3D10_SB_OPCODE_SQRT: expand_unary_to_scalarf(ureg, ureg_SQRT, &sx, &opcode); break; case D3D10_SB_OPCODE_RSQ: expand_unary_to_scalarf(ureg, ureg_RSQ, &sx, &opcode); break; case D3D10_SB_OPCODE_LOG: expand_unary_to_scalarf(ureg, ureg_LG2, &sx, &opcode); break; case D3D10_SB_OPCODE_IMUL: if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_IMUL_HI(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), translate_src_operand(&sx, &opcode.src[0], OF_INT), translate_src_operand(&sx, &opcode.src[1], OF_INT)); } if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_UMUL(ureg, translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate), translate_src_operand(&sx, &opcode.src[0], OF_INT), translate_src_operand(&sx, &opcode.src[1], OF_INT)); } break; case D3D10_SB_OPCODE_FTOI: { /* XXX: tgsi (and just about everybody else, c, opencl, glsl) has * out-of-range (and NaN) values undefined for f2i/f2u, but d3d10 * requires clamping to min and max representable value (as well as 0 * for NaNs) (this applies to both ftoi and ftou). At least the online * docs state that - this is consistent with generic d3d10 conversion * rules. * For FTOI, we cheat a bit here - in particular depending on noone * caring about NaNs, and depending on the (undefined!) behavior of * F2I returning 0x80000000 for too negative values (which works with * x86 sse). Hence only need to clamp too positive values. * Note that it is impossible to clamp using a float, since 2^31 - 1 * is not exactly representable with a float. */ struct ureg_dst too_large = ureg_DECL_temporary(ureg); struct ureg_dst tmp = ureg_DECL_temporary(ureg); ureg_FSGE(ureg, too_large, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), ureg_imm1f(ureg, 2147483648.0f)); ureg_F2I(ureg, tmp, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_UCMP(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), ureg_src(too_large), ureg_imm1i(ureg, 0x7fffffff), ureg_src(tmp)); ureg_release_temporary(ureg, too_large); ureg_release_temporary(ureg, tmp); } break; case D3D10_SB_OPCODE_FTOU: { /* For ftou, we need to do both clamps, which as a bonus also * gets us correct NaN behavior. * Note that it is impossible to clamp using a float against the upper * limit, since 2^32 - 1 is not exactly representable with a float, * but the clamp against 0.0 certainly works just fine. */ struct ureg_dst too_large = ureg_DECL_temporary(ureg); struct ureg_dst tmp = ureg_DECL_temporary(ureg); ureg_FSGE(ureg, too_large, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), ureg_imm1f(ureg, 4294967296.0f)); /* clamp negative values + NaN to zero. * (Could be done slightly more efficient in llvmpipe due to * MAX NaN behavior handling.) */ ureg_MAX(ureg, tmp, ureg_imm1f(ureg, 0.0f), translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_F2U(ureg, tmp, ureg_src(tmp)); ureg_UCMP(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), ureg_src(too_large), ureg_imm1u(ureg, 0xffffffff), ureg_src(tmp)); ureg_release_temporary(ureg, too_large); ureg_release_temporary(ureg, tmp); } break; case D3D10_SB_OPCODE_LD_MS: /* XXX: We don't support multi-sampling yet, but we need to parse * this opcode regardless, so we just ignore sample index operand * for now */ case D3D10_SB_OPCODE_LD: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { unsigned resource = opcode.src[1].base.index[0].imm; assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); if (ureg_src_is_undef(sx.samplers[resource])) { sx.samplers[resource] = ureg_DECL_sampler(ureg, resource); } ureg_TXF(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[resource].target, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), sx.samplers[resource]); } else { struct ureg_src srcreg[2]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_INT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_INT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_I, 2, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_CUSTOMDATA: if (opcode.customdata._class == D3D10_SB_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER) { sx.imms = ureg_DECL_immediate_block_uint(ureg, opcode.customdata.u.constbuf.data, opcode.customdata.u.constbuf.count); } else { assert(0); } break; case D3D10_SB_OPCODE_RESINFO: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { unsigned resource = opcode.src[1].base.index[0].imm; assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); if (ureg_src_is_undef(sx.samplers[resource])) { sx.samplers[resource] = ureg_DECL_sampler(ureg, resource); } /* don't bother with swizzle, ret type etc. */ ureg_TXQ(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[resource].target, translate_src_operand(&sx, &opcode.src[0], OF_UINT), sx.samplers[resource]); } else { struct ureg_dst r0 = ureg_DECL_temporary(ureg); struct ureg_src tsrc = translate_src_operand(&sx, &opcode.src[1], OF_UINT); struct ureg_dst dstreg = translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate); /* while specs say swizzle is ignored better safe than sorry */ tsrc.SwizzleX = TGSI_SWIZZLE_X; tsrc.SwizzleY = TGSI_SWIZZLE_Y; tsrc.SwizzleZ = TGSI_SWIZZLE_Z; tsrc.SwizzleW = TGSI_SWIZZLE_W; ureg_SVIEWINFO(ureg, r0, translate_src_operand(&sx, &opcode.src[0], OF_UINT), tsrc); tsrc = ureg_src(r0); tsrc.SwizzleX = opcode.src[1].swizzle[0]; tsrc.SwizzleY = opcode.src[1].swizzle[1]; tsrc.SwizzleZ = opcode.src[1].swizzle[2]; tsrc.SwizzleW = opcode.src[1].swizzle[3]; if (opcode.specific.resinfo_ret_type == D3D10_SB_RESINFO_INSTRUCTION_RETURN_UINT) { ureg_MOV(ureg, dstreg, tsrc); } else if (opcode.specific.resinfo_ret_type == D3D10_SB_RESINFO_INSTRUCTION_RETURN_FLOAT) { ureg_I2F(ureg, dstreg, tsrc); } else { /* D3D10_SB_RESINFO_INSTRUCTION_RETURN_RCPFLOAT */ unsigned i; /* * Must apply rcp only to parts determined by dims, * (width/height/depth) but NOT to array size nor mip levels * hence need to figure that out here. * This is one sick modifier if you ask me! */ unsigned res_index = opcode.src[1].base.index[0].imm; unsigned target = sx.resources[res_index].target; unsigned dims = texture_dim_from_tgsi_target(target); ureg_I2F(ureg, r0, ureg_src(r0)); tsrc = ureg_src(r0); for (i = 0; i < 4; i++) { unsigned dst_swizzle = opcode.src[1].swizzle[i]; struct ureg_dst dstregmasked = ureg_writemask(dstreg, 1 << i); /* * could do one mov with multiple write mask bits set * but rcp is scalar anyway. */ if (dst_swizzle < dims) { ureg_RCP(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle)); } else { ureg_MOV(ureg, dstregmasked, ureg_scalar(tsrc, dst_swizzle)); } } } ureg_release_temporary(ureg, r0); } break; case D3D10_SB_OPCODE_SAMPLE: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); LOG_UNSUPPORTED(opcode.src[1].base.index[0].imm != opcode.src[2].base.index[0].imm); ureg_TEX(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); } else { struct ureg_src srcreg[3]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE, 3, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SAMPLE_C: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { struct ureg_dst r0 = ureg_DECL_temporary(ureg); /* XXX: Support only 2D texture targets for now. * Need to figure out how to pack the compare value * for other dimensions and if there is enough space * in a single operand for all possible cases. */ LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target != TGSI_TEXTURE_2D); assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); /* Insert the compare value into .z component. */ ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_XYW), translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_Z), translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); /* XXX: Pass explicit Lod=0 in D3D10_SB_OPCODE_SAMPLE_C_LZ case. */ ureg_TEX(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, ureg_src(r0), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); ureg_release_temporary(ureg, r0); } else { struct ureg_src srcreg[4]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C, 4, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SAMPLE_C_LZ: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { struct ureg_dst r0 = ureg_DECL_temporary(ureg); assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); /* XXX: Support only 2D texture targets for now. * Need to figure out how to pack the compare value * for other dimensions and if there is enough space * in a single operand for all possible cases. */ LOG_UNSUPPORTED(sx.resources[opcode.src[1].base.index[0].imm].target != TGSI_TEXTURE_2D); /* Insert the compare value into .z component. * Insert 0 into .w component. */ ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_XY), translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_Z), translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 0.0f)); ureg_TXL(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, ureg_src(r0), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); ureg_release_temporary(ureg, r0); } else { struct ureg_src srcreg[4]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_C_LZ, 4, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SAMPLE_L: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { struct ureg_dst r0 = ureg_DECL_temporary(ureg); assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); /* Insert LOD into .w component. */ ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_XYZ), translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_W), translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); ureg_TXL(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, ureg_src(r0), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); ureg_release_temporary(ureg, r0); } else { struct ureg_src srcreg[4]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_L, 4, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SAMPLE_D: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); ureg_TXD(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT), translate_src_operand(&sx, &opcode.src[3], OF_FLOAT), translate_src_operand(&sx, &opcode.src[4], OF_FLOAT), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); } else { struct ureg_src srcreg[5]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); srcreg[4] = translate_src_operand(&sx, &opcode.src[4], OF_FLOAT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_D, 5, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SAMPLE_B: if (st_debug & ST_DEBUG_OLD_TEX_OPS) { struct ureg_dst r0 = ureg_DECL_temporary(ureg); assert(opcode.src[1].base.index_dim == 1); assert(opcode.src[1].base.index[0].imm < SHADER_MAX_RESOURCES); /* Insert LOD bias into .w component. */ ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_XYZ), translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); ureg_MOV(ureg, ureg_writemask(r0, TGSI_WRITEMASK_W), translate_src_operand(&sx, &opcode.src[3], OF_FLOAT)); ureg_TXB(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), sx.resources[opcode.src[1].base.index[0].imm].target, ureg_src(r0), translate_src_operand(&sx, &opcode.src[2], OF_FLOAT)); ureg_release_temporary(ureg, r0); } else { struct ureg_src srcreg[4]; srcreg[0] = translate_src_operand(&sx, &opcode.src[0], OF_FLOAT); srcreg[1] = translate_src_operand(&sx, &opcode.src[1], OF_UINT); srcreg[2] = translate_src_operand(&sx, &opcode.src[2], OF_UINT); srcreg[3] = translate_src_operand(&sx, &opcode.src[3], OF_FLOAT); sample_ureg_emit(ureg, TGSI_OPCODE_SAMPLE_B, 4, &opcode, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), srcreg); } break; case D3D10_SB_OPCODE_SINCOS: { struct ureg_dst src0 = ureg_DECL_temporary(ureg); ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_FLOAT)); if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate); struct ureg_src src = ureg_src(src0); ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_scalar(src, TGSI_SWIZZLE_X)); ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), ureg_scalar(src, TGSI_SWIZZLE_Y)); ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), ureg_scalar(src, TGSI_SWIZZLE_Z)); ureg_SIN(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_scalar(src, TGSI_SWIZZLE_W)); } if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { struct ureg_dst dst = translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate); struct ureg_src src = ureg_src(src0); ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_X), ureg_scalar(src, TGSI_SWIZZLE_X)); ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Y), ureg_scalar(src, TGSI_SWIZZLE_Y)); ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_Z), ureg_scalar(src, TGSI_SWIZZLE_Z)); ureg_COS(ureg, ureg_writemask(dst, TGSI_WRITEMASK_W), ureg_scalar(src, TGSI_SWIZZLE_W)); } ureg_release_temporary(ureg, src0); } break; case D3D10_SB_OPCODE_UDIV: { struct ureg_dst src0 = ureg_DECL_temporary(ureg); struct ureg_dst src1 = ureg_DECL_temporary(ureg); ureg_MOV(ureg, src0, translate_src_operand(&sx, &opcode.src[0], OF_UINT)); ureg_MOV(ureg, src1, translate_src_operand(&sx, &opcode.src[1], OF_UINT)); if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_UDIV(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), ureg_src(src0), ureg_src(src1)); } if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_UMOD(ureg, translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate), ureg_src(src0), ureg_src(src1)); } ureg_release_temporary(ureg, src0); ureg_release_temporary(ureg, src1); } break; case D3D10_SB_OPCODE_UMUL: { if (opcode.dst[0].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_UMUL_HI(ureg, translate_dst_operand(&sx, &opcode.dst[0], opcode.saturate), translate_src_operand(&sx, &opcode.src[0], OF_UINT), translate_src_operand(&sx, &opcode.src[1], OF_UINT)); } if (opcode.dst[1].base.type != D3D10_SB_OPERAND_TYPE_NULL) { ureg_UMUL(ureg, translate_dst_operand(&sx, &opcode.dst[1], opcode.saturate), translate_src_operand(&sx, &opcode.src[0], OF_UINT), translate_src_operand(&sx, &opcode.src[1], OF_UINT)); } } break; case D3D10_SB_OPCODE_DCL_RESOURCE: { unsigned target; unsigned res_index = opcode.dst[0].base.index[0].imm; assert(opcode.dst[0].base.index_dim == 1); assert(res_index < SHADER_MAX_RESOURCES); target = translate_resource_dimension(opcode.specific.dcl_resource_dimension); sx.resources[res_index].target = target; if (!(st_debug & ST_DEBUG_OLD_TEX_OPS)) { sx.sv[res_index] = ureg_DECL_sampler_view(ureg, res_index, target, trans_dcl_ret_type(opcode.dcl_resource_ret_type[0]), trans_dcl_ret_type(opcode.dcl_resource_ret_type[1]), trans_dcl_ret_type(opcode.dcl_resource_ret_type[2]), trans_dcl_ret_type(opcode.dcl_resource_ret_type[3])); } break; } case D3D10_SB_OPCODE_DCL_CONSTANT_BUFFER: { unsigned num_constants = opcode.src[0].base.index[1].imm; assert(opcode.src[0].base.index[0].imm < PIPE_MAX_CONSTANT_BUFFERS); if (num_constants == 0) { num_constants = SHADER_MAX_CONSTS; } else { assert(num_constants <= SHADER_MAX_CONSTS); } ureg_DECL_constant2D(ureg, 0, num_constants - 1, opcode.src[0].base.index[0].imm); break; } case D3D10_SB_OPCODE_DCL_SAMPLER: assert(opcode.dst[0].base.index_dim == 1); assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_SAMPLERS); sx.samplers[opcode.dst[0].base.index[0].imm] = ureg_DECL_sampler(ureg, opcode.dst[0].base.index[0].imm); break; case D3D10_SB_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); switch (opcode.specific.dcl_gs_output_primitive_topology) { case D3D10_SB_PRIMITIVE_TOPOLOGY_POINTLIST: ureg_property(sx.ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_POINTS); break; case D3D10_SB_PRIMITIVE_TOPOLOGY_LINESTRIP: ureg_property(sx.ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_LINE_STRIP); break; case D3D10_SB_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: ureg_property(sx.ureg, TGSI_PROPERTY_GS_OUTPUT_PRIM, PIPE_PRIM_TRIANGLE_STRIP); break; default: assert(0); } break; case D3D10_SB_OPCODE_DCL_GS_INPUT_PRIMITIVE: assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); /* Figure out the second dimension of GS inputs. */ switch (opcode.specific.dcl_gs_input_primitive) { case D3D10_SB_PRIMITIVE_POINT: declare_vertices_in(&sx, 1); ureg_property(sx.ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_POINTS); break; case D3D10_SB_PRIMITIVE_LINE: declare_vertices_in(&sx, 2); ureg_property(sx.ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_LINES); break; case D3D10_SB_PRIMITIVE_TRIANGLE: declare_vertices_in(&sx, 3); ureg_property(sx.ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES); break; case D3D10_SB_PRIMITIVE_LINE_ADJ: declare_vertices_in(&sx, 4); ureg_property(sx.ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_LINES_ADJACENCY); break; case D3D10_SB_PRIMITIVE_TRIANGLE_ADJ: declare_vertices_in(&sx, 6); ureg_property(sx.ureg, TGSI_PROPERTY_GS_INPUT_PRIM, PIPE_PRIM_TRIANGLES_ADJACENCY); break; default: assert(0); } break; case D3D10_SB_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); ureg_property(sx.ureg, TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES, opcode.specific.dcl_max_output_vertex_count); break; case D3D10_SB_OPCODE_DCL_INPUT: if (parser.header.type == D3D10_SB_VERTEX_SHADER) { dcl_vs_input(&sx, ureg, &opcode.dst[0]); } else { assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); dcl_gs_input(&sx, ureg, &opcode.dst[0]); } break; case D3D10_SB_OPCODE_DCL_INPUT_SGV: assert(parser.header.type == D3D10_SB_VERTEX_SHADER); dcl_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name); break; case D3D10_SB_OPCODE_DCL_INPUT_SIV: assert(parser.header.type == D3D10_SB_GEOMETRY_SHADER); dcl_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name); break; case D3D10_SB_OPCODE_DCL_INPUT_PS: assert(parser.header.type == D3D10_SB_PIXEL_SHADER); dcl_ps_input(&sx, ureg, &opcode.dst[0], opcode.specific.dcl_in_ps_interp); break; case D3D10_SB_OPCODE_DCL_INPUT_PS_SGV: assert(parser.header.type == D3D10_SB_PIXEL_SHADER); dcl_ps_sgv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name); break; case D3D10_SB_OPCODE_DCL_INPUT_PS_SIV: assert(parser.header.type == D3D10_SB_PIXEL_SHADER); dcl_ps_siv_input(&sx, ureg, &opcode.dst[0], opcode.dcl_siv_name, opcode.specific.dcl_in_ps_interp); break; case D3D10_SB_OPCODE_DCL_OUTPUT: if (parser.header.type == D3D10_SB_PIXEL_SHADER) { /* Pixel shader outputs. */ if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { /* Depth output. */ assert(opcode.dst[0].base.index_dim == 0); sx.output_depth = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_WRITEMASK_Z, 0, 1); sx.output_depth = ureg_writemask(sx.output_depth, TGSI_WRITEMASK_Z); } else { /* Color outputs. */ assert(opcode.dst[0].base.index_dim == 1); assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); dcl_base_output(&sx, ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, opcode.dst[0].base.index[0].imm), &opcode.dst[0]); } } else { assert(opcode.dst[0].base.index_dim == 1); assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); if (output_mapping) { unsigned nr_outputs = ureg_get_nr_outputs(ureg); output_mapping[nr_outputs] = opcode.dst[0].base.index[0].imm; } dcl_base_output(&sx, ureg, ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, opcode.dst[0].base.index[0].imm), &opcode.dst[0]); } break; case D3D10_SB_OPCODE_DCL_OUTPUT_SIV: assert(opcode.dst[0].base.index_dim == 1); assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); if (output_mapping) { unsigned nr_outputs = ureg_get_nr_outputs(ureg); output_mapping[nr_outputs] = opcode.dst[0].base.index[0].imm; } if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE || opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) { /* * FIXME: this is quite broken. gallium no longer has separate * clip/cull dists, using (max 2) combined clipdist/culldist regs * instead. Unlike d3d10 though, which is clip and which cull is * simply determined by by number of clip/cull dists (that is, * all clip dists must come first). */ unsigned numcliporcull = sx.num_clip_distances_declared + sx.num_cull_distances_declared; sx.clip_distance_mapping[numcliporcull].d3d = opcode.dst[0].base.index[0].imm; sx.clip_distance_mapping[numcliporcull].tgsi = numcliporcull; if (opcode.dcl_siv_name == D3D10_SB_NAME_CLIP_DISTANCE) { ++sx.num_clip_distances_declared; /* re-emit should be safe... */ ureg_property(ureg, TGSI_PROPERTY_NUM_CLIPDIST_ENABLED, sx.num_clip_distances_declared); } else { ++sx.num_cull_distances_declared; ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED, sx.num_cull_distances_declared); } } else if (0 && opcode.dcl_siv_name == D3D10_SB_NAME_CULL_DISTANCE) { sx.cull_distance_mapping[sx.num_cull_distances_declared].d3d = opcode.dst[0].base.index[0].imm; sx.cull_distance_mapping[sx.num_cull_distances_declared].tgsi = sx.num_cull_distances_declared; ++sx.num_cull_distances_declared; ureg_property(ureg, TGSI_PROPERTY_NUM_CULLDIST_ENABLED, sx.num_cull_distances_declared); } dcl_base_output(&sx, ureg, ureg_DECL_output_masked( ureg, translate_system_name(opcode.dcl_siv_name), translate_semantic_index(&sx, opcode.dcl_siv_name, &opcode.dst[0]), opcode.dst[0].mask >> D3D10_SB_OPERAND_4_COMPONENT_MASK_SHIFT, 0, 1), &opcode.dst[0]); break; case D3D10_SB_OPCODE_DCL_OUTPUT_SGV: assert(opcode.dst[0].base.index_dim == 1); assert(opcode.dst[0].base.index[0].imm < SHADER_MAX_OUTPUTS); if (output_mapping) { unsigned nr_outputs = ureg_get_nr_outputs(ureg); output_mapping[nr_outputs] = opcode.dst[0].base.index[0].imm; } dcl_base_output(&sx, ureg, ureg_DECL_output(ureg, translate_system_name(opcode.dcl_siv_name), 0), &opcode.dst[0]); break; case D3D10_SB_OPCODE_DCL_TEMPS: { uint i; assert(opcode.specific.dcl_num_temps + sx.declared_temps <= SHADER_MAX_TEMPS); sx.temp_offset = sx.declared_temps; for (i = 0; i < opcode.specific.dcl_num_temps; i++) { sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg); } sx.declared_temps += opcode.specific.dcl_num_temps; } break; case D3D10_SB_OPCODE_DCL_INDEXABLE_TEMP: { uint i; /* XXX: Add true indexable temps to gallium. */ assert(opcode.specific.dcl_indexable_temp.index < SHADER_MAX_INDEXABLE_TEMPS); assert(opcode.specific.dcl_indexable_temp.count + sx.declared_temps <= SHADER_MAX_TEMPS); sx.indexable_temp_offsets[opcode.specific.dcl_indexable_temp.index] = sx.declared_temps; for (i = 0; i < opcode.specific.dcl_indexable_temp.count; i++) { sx.temps[sx.declared_temps + i] = ureg_DECL_temporary(ureg); } sx.declared_temps += opcode.specific.dcl_indexable_temp.count; } break; case D3D10_SB_OPCODE_IF: { unsigned label = 0; if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) { struct ureg_src src = translate_src_operand(&sx, &opcode.src[0], OF_INT); struct ureg_dst src_nz = ureg_DECL_temporary(ureg); ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0)); ureg_UIF(ureg, ureg_src(src_nz), &label); ureg_release_temporary(ureg, src_nz);; } else { ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label); } } break; case D3D10_SB_OPCODE_RETC: case D3D10_SB_OPCODE_CONTINUEC: case D3D10_SB_OPCODE_CALLC: case D3D10_SB_OPCODE_DISCARD: case D3D10_SB_OPCODE_BREAKC: { unsigned label = 0; assert(operand_is_scalar(&opcode.src[0])); if (opcode.specific.test_boolean == D3D10_SB_INSTRUCTION_TEST_ZERO) { struct ureg_src src = translate_src_operand(&sx, &opcode.src[0], OF_INT); struct ureg_dst src_nz = ureg_DECL_temporary(ureg); ureg_USEQ(ureg, src_nz, src, ureg_imm1u(ureg, 0)); ureg_UIF(ureg, ureg_src(src_nz), &label); ureg_release_temporary(ureg, src_nz); } else { ureg_UIF(ureg, translate_src_operand(&sx, &opcode.src[0], OF_INT), &label); } switch (opcode.type) { case D3D10_SB_OPCODE_RETC: ureg_RET(ureg); break; case D3D10_SB_OPCODE_CONTINUEC: ureg_CONT(ureg); break; case D3D10_SB_OPCODE_CALLC: { unsigned label = opcode.src[1].base.index[0].imm; unsigned tgsi_token_label = 0; ureg_CAL(ureg, &tgsi_token_label); Shader_add_call(&sx, label, tgsi_token_label); } break; case D3D10_SB_OPCODE_DISCARD: ureg_KILL(ureg); break; case D3D10_SB_OPCODE_BREAKC: ureg_BRK(ureg); break; default: assert(0); break; } ureg_ENDIF(ureg); } break; case D3D10_SB_OPCODE_LABEL: { unsigned label = opcode.src[0].base.index[0].imm; unsigned tgsi_inst_no = 0; if (inside_sub) { ureg_ENDSUB(ureg); } tgsi_inst_no = ureg_get_instruction_number(ureg); ureg_BGNSUB(ureg); inside_sub = TRUE; Shader_add_label(&sx, label, tgsi_inst_no); } break; case D3D10_SB_OPCODE_CALL: { unsigned label = opcode.src[0].base.index[0].imm; unsigned tgsi_token_label = 0; ureg_CAL(ureg, &tgsi_token_label); Shader_add_call(&sx, label, tgsi_token_label); } break; case D3D10_SB_OPCODE_EMIT: ureg_EMIT(ureg, ureg_imm1u(ureg, 0)); break; case D3D10_SB_OPCODE_CUT: ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0)); break; case D3D10_SB_OPCODE_EMITTHENCUT: ureg_EMIT(ureg, ureg_imm1u(ureg, 0)); ureg_ENDPRIM(ureg, ureg_imm1u(ureg, 0)); break; case D3D10_SB_OPCODE_DCL_INDEX_RANGE: case D3D10_SB_OPCODE_DCL_GLOBAL_FLAGS: /* Ignore */ break; default: { uint i; struct ureg_dst dst[SHADER_MAX_DST_OPERANDS]; struct ureg_src src[SHADER_MAX_SRC_OPERANDS]; assert(ox->tgsi_opcode != TGSI_EXPAND); if (ox->tgsi_opcode == TGSI_LOG_UNSUPPORTED) { if (!shader_dumped) { dx10_shader_dump_tokens(code); shader_dumped = TRUE; } debug_printf("%s: unsupported opcode %i\n", __FUNCTION__, ox->type); assert(ox->tgsi_opcode != TGSI_LOG_UNSUPPORTED); } /* Destination operands. */ for (i = 0; i < opcode.num_dst; i++) { dst[i] = translate_dst_operand(&sx, &opcode.dst[i], opcode.saturate); } /* Source operands. */ for (i = 0; i < opcode.num_src; i++) { src[i] = translate_src_operand(&sx, &opcode.src[i], ox->format); } /* Try to re-route output depth to Z channel. */ if (opcode.dst[0].base.type == D3D10_SB_OPERAND_TYPE_OUTPUT_DEPTH) { LOG_UNSUPPORTED(opcode.type != D3D10_SB_OPCODE_MOV); dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_Z); src[0] = ureg_scalar(src[0], TGSI_SWIZZLE_X); } ureg_insn(ureg, ox->tgsi_opcode, dst, opcode.num_dst, src, opcode.num_src, 0); } } Shader_opcode_free(&opcode); } if (inside_sub) { ureg_ENDSUB(ureg); } ureg_END(ureg); for (i = 0; i < sx.num_calls; ++i) { for (j = 0; j < sx.num_labels; ++j) { if (sx.calls[i].d3d_label == sx.labels[j].d3d_label) { ureg_fixup_label(sx.ureg, sx.calls[i].tgsi_label_token, sx.labels[j].tgsi_insn_no); break; } } ASSERT(j < sx.num_labels); } FREE(sx.labels); FREE(sx.calls); tokens = ureg_get_tokens(ureg, &nr_tokens); assert(tokens); ureg_destroy(ureg); if (st_debug & ST_DEBUG_TGSI) { tgsi_dump(tokens, 0); } return tokens; }