Bug Summary

File:Source/Core/VideoCommon/Src/PixelShaderGen.cpp
Location:line 245, column 5
Description:Value stored to 'ptr' is never read

Annotated Source Code

1// Copyright 2013 Dolphin Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <stdio.h>
6#include <cmath>
7#include <assert.h>
8#include <locale.h>
9
10#include "LightingShaderGen.h"
11#include "PixelShaderGen.h"
12#include "XFMemory.h" // for texture projection mode
13#include "BPMemory.h"
14#include "VideoConfig.h"
15#include "NativeVertexFormat.h"
16
17
18static void StageHash(u32 stage, u32* out)
19{
20 out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
21 u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
22 out[0] |= (alphaC&0xF0) << 24; // 8
23 out[1] |= alphaC >> 8; // 16
24
25 // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap
26 out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3
27 out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1
28 // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan
29
30 bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages;
31 out[2] |= bHasIndStage << 2; // 1
32
33 bool needstexcoord = false;
34
35 if (bHasIndStage)
36 {
37 out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation
38 needstexcoord = true;
39 }
40
41
42 TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC;
43 TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC;
44
45 if(cc.a == TEVCOLORARG_RASA11 || cc.a == TEVCOLORARG_RASC10
46 || cc.b == TEVCOLORARG_RASA11 || cc.b == TEVCOLORARG_RASC10
47 || cc.c == TEVCOLORARG_RASA11 || cc.c == TEVCOLORARG_RASC10
48 || cc.d == TEVCOLORARG_RASA11 || cc.d == TEVCOLORARG_RASC10
49 || ac.a == TEVALPHAARG_RASA5 || ac.b == TEVALPHAARG_RASA5
50 || ac.c == TEVALPHAARG_RASA5 || ac.d == TEVALPHAARG_RASA5)
51 {
52 out[0] |= bpmem.combiners[stage].alphaC.rswap;
53 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2
54 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2
55 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2
56 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2
57 out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23;
58 out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1;
59 }
60
61 out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1);
62 if (bpmem.tevorders[stage/2].getEnable(stage&1))
63 {
64 if (bHasIndStage)
65 needstexcoord = true;
66
67 out[0] |= bpmem.combiners[stage].alphaC.tswap;
68 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2
69 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2
70 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2
71 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2
72 out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16;
73 }
74
75 if (cc.a == TEVCOLORARG_KONST14 || cc.b == TEVCOLORARG_KONST14 || cc.c == TEVCOLORARG_KONST14 || cc.d == TEVCOLORARG_KONST14
76 || ac.a == TEVALPHAARG_KONST6 || ac.b == TEVALPHAARG_KONST6 || ac.c == TEVALPHAARG_KONST6 || ac.d == TEVALPHAARG_KONST6)
77 {
78 out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5
79 out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5
80 }
81
82 if (needstexcoord)
83 {
84 out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16;
85 }
86}
87
88// Mash together all the inputs that contribute to the code of a generated pixel shader into
89// a unique identifier, basically containing all the bits. Yup, it's a lot ....
90// It would likely be a lot more efficient to build this incrementally as the attributes
91// are set...
92void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
93{
94 memset(uid->values, 0, sizeof(uid->values));
95 uid->values[0] |= bpmem.genMode.numtevstages; // 4
96 uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
97 uid->values[0] |= dstAlphaMode << 8; // 2
98
99 bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
100 uid->values[0] |= enablePL << 10; // 1
101
102 if (!enablePL)
103 {
104 uid->values[0] |= xfregs.numTexGen.numTexGens << 11; // 4
105 }
106
107 AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult();
108 uid->values[0] |= alphaPreTest << 15; // 2
109
110 // numtexgens should be <= 8
111 for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
112 {
113 uid->values[0] |= xfregs.texMtxInfo[i].projection << (17+i); // 1
114 }
115
116 uid->values[1] = bpmem.genMode.numindstages; // 3
117 u32 indirectStagesUsed = 0;
118 for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
119 {
120 if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
121 indirectStagesUsed |= (1 << bpmem.tevind[i].bt);
122 }
123
124 assert(indirectStagesUsed == (indirectStagesUsed & 0xF))(static_cast<void> (0));
125
126 uid->values[1] |= indirectStagesUsed << 3; // 4;
127
128 for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
129 {
130 if (indirectStagesUsed & (1 << i))
131 {
132 uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (7 + 3*i); // 1
133 if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
134 uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (8 + 3*i); // 2
135 }
136 }
137
138 u32* ptr = &uid->values[2];
139 for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i)
140 {
141 StageHash(i, ptr);
142 ptr += 4; // max: ptr = &uid->values[66]
143 }
144
145 ptr[0] |= bpmem.alpha_test.comp0; // 3
146 ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3
147 ptr[0] |= bpmem.alpha_test.logic << 6; // 2
148
149 ptr[0] |= bpmem.ztex2.op << 8; // 2
150 ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1
151 ptr[0] |= bpmem.zmode.testenable << 11; // 1
152 ptr[0] |= bpmem.zmode.updateenable << 12; // 1
153
154 if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
155 {
156 ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3
157 if (bpmem.fog.c_proj_fsel.fsel != 0)
158 {
159 ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
160 ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
161 }
162 }
163
164 ++ptr;
165 if (enablePL)
166 {
167 ptr += GetLightingShaderId(ptr);
168 *ptr++ = components;
169 }
170
171 uid->num_values = int(ptr - uid->values);
172}
173
174void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
175{
176 memset(uid->values, 0, sizeof(uid->values));
177 u32* ptr = uid->values;
178 *ptr++ = dstAlphaMode; // 0
179 *ptr++ = bpmem.genMode.hex; // 1
180 *ptr++ = bpmem.ztex2.hex; // 2
181 *ptr++ = bpmem.zcontrol.hex; // 3
182 *ptr++ = bpmem.zmode.hex; // 4
183 *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 5
184 *ptr++ = xfregs.numTexGen.hex; // 6
185
186 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
187 {
188 *ptr++ = xfregs.color[0].hex;
189 *ptr++ = xfregs.alpha[0].hex;
190 *ptr++ = xfregs.color[1].hex;
191 *ptr++ = xfregs.alpha[1].hex;
192 *ptr++ = components;
193 }
194
195 for (unsigned int i = 0; i < 8; ++i)
196 *ptr++ = xfregs.texMtxInfo[i].hex; // 7-14
197
198 for (unsigned int i = 0; i < 16; ++i)
199 *ptr++ = bpmem.tevind[i].hex; // 15-30
200
201 *ptr++ = bpmem.tevindref.hex; // 31
202
203 for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times
204 {
205 *ptr++ = bpmem.combiners[i].colorC.hex; // 32+5*i
206 *ptr++ = bpmem.combiners[i].alphaC.hex; // 33+5*i
207 *ptr++ = bpmem.tevind[i].hex; // 34+5*i
208 *ptr++ = bpmem.tevksel[i/2].hex; // 35+5*i
209 *ptr++ = bpmem.tevorders[i/2].hex; // 36+5*i
210 }
211
212 ptr = &uid->values[112];
213
214 *ptr++ = bpmem.alpha_test.hex; // 112
215
216 *ptr++ = bpmem.fog.c_proj_fsel.hex; // 113
217 *ptr++ = bpmem.fogRange.Base.hex; // 114
218
219 _assert_((ptr - uid->values) == uid->GetNumValues()){};
220}
221
222void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
223{
224 if (!g_ActiveConfig.bEnableShaderDebugging)
225 return;
226
227 PIXELSHADERUIDSAFE new_id;
228 GetSafePixelShaderId(&new_id, dstAlphaMode, components);
229
230 if (!(old_id == new_id))
231 {
232 std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
233 if (old_code != new_code)
234 {
235 _assert_(old_id.GetNumValues() == new_id.GetNumValues()){};
236
237 char msg[8192];
238 char* ptr = msg;
239 ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
240 const int N = new_id.GetNumValues();
241 for (int i = 0; i < N/2; ++i)
242 ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
243 new_id.values[2*i], new_id.values[2*i+1]);
244 if (N % 2)
245 ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
Value stored to 'ptr' is never read
246
247 static int num_failures = 0;
248 char szTemp[MAX_PATH4096];
249 sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
250 std::ofstream file;
251 OpenFStream(file, szTemp, std::ios_base::out);
252 file << msg;
253 file << "\n\nOld shader code:\n" << old_code;
254 file << "\n\nNew shader code:\n" << new_code;
255 file.close();
256
257 PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp)MsgAlert(false, WARNING, "Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s."
, szTemp)
;
258 }
259 }
260}
261
262// old tev->pixelshader notes
263//
264// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0
265// konstant for this stage (alpha, color) is given by bpmem.tevksel
266// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current channel color
267// according to GXTevColorArg table above
268// output is given by .outreg
269// tevtemp is set according to swapmodetables and
270
271static void WriteStage(char *&p, int n, API_TYPE ApiType);
272static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
273// static void WriteAlphaCompare(char *&p, int num, int comp);
274static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
275static void WriteFog(char *&p, API_TYPE ApiType);
276
277static const char *tevKSelTableC[] = // KCSEL
278{
279 "1.0f,1.0f,1.0f", // 1 = 0x00
280 "0.875f,0.875f,0.875f", // 7_8 = 0x01
281 "0.75f,0.75f,0.75f", // 3_4 = 0x02
282 "0.625f,0.625f,0.625f", // 5_8 = 0x03
283 "0.5f,0.5f,0.5f", // 1_2 = 0x04
284 "0.375f,0.375f,0.375f", // 3_8 = 0x05
285 "0.25f,0.25f,0.25f", // 1_4 = 0x06
286 "0.125f,0.125f,0.125f", // 1_8 = 0x07
287 "ERROR1", // 0x08
288 "ERROR2", // 0x09
289 "ERROR3", // 0x0a
290 "ERROR4", // 0x0b
291 I_KCOLORS"k""[0].rgb", // K0 = 0x0C
292 I_KCOLORS"k""[1].rgb", // K1 = 0x0D
293 I_KCOLORS"k""[2].rgb", // K2 = 0x0E
294 I_KCOLORS"k""[3].rgb", // K3 = 0x0F
295 I_KCOLORS"k""[0].rrr", // K0_R = 0x10
296 I_KCOLORS"k""[1].rrr", // K1_R = 0x11
297 I_KCOLORS"k""[2].rrr", // K2_R = 0x12
298 I_KCOLORS"k""[3].rrr", // K3_R = 0x13
299 I_KCOLORS"k""[0].ggg", // K0_G = 0x14
300 I_KCOLORS"k""[1].ggg", // K1_G = 0x15
301 I_KCOLORS"k""[2].ggg", // K2_G = 0x16
302 I_KCOLORS"k""[3].ggg", // K3_G = 0x17
303 I_KCOLORS"k""[0].bbb", // K0_B = 0x18
304 I_KCOLORS"k""[1].bbb", // K1_B = 0x19
305 I_KCOLORS"k""[2].bbb", // K2_B = 0x1A
306 I_KCOLORS"k""[3].bbb", // K3_B = 0x1B
307 I_KCOLORS"k""[0].aaa", // K0_A = 0x1C
308 I_KCOLORS"k""[1].aaa", // K1_A = 0x1D
309 I_KCOLORS"k""[2].aaa", // K2_A = 0x1E
310 I_KCOLORS"k""[3].aaa", // K3_A = 0x1F
311};
312
313static const char *tevKSelTableA[] = // KASEL
314{
315 "1.0f", // 1 = 0x00
316 "0.875f",// 7_8 = 0x01
317 "0.75f", // 3_4 = 0x02
318 "0.625f",// 5_8 = 0x03
319 "0.5f", // 1_2 = 0x04
320 "0.375f",// 3_8 = 0x05
321 "0.25f", // 1_4 = 0x06
322 "0.125f",// 1_8 = 0x07
323 "ERROR5", // 0x08
324 "ERROR6", // 0x09
325 "ERROR7", // 0x0a
326 "ERROR8", // 0x0b
327 "ERROR9", // 0x0c
328 "ERROR10", // 0x0d
329 "ERROR11", // 0x0e
330 "ERROR12", // 0x0f
331 I_KCOLORS"k""[0].r", // K0_R = 0x10
332 I_KCOLORS"k""[1].r", // K1_R = 0x11
333 I_KCOLORS"k""[2].r", // K2_R = 0x12
334 I_KCOLORS"k""[3].r", // K3_R = 0x13
335 I_KCOLORS"k""[0].g", // K0_G = 0x14
336 I_KCOLORS"k""[1].g", // K1_G = 0x15
337 I_KCOLORS"k""[2].g", // K2_G = 0x16
338 I_KCOLORS"k""[3].g", // K3_G = 0x17
339 I_KCOLORS"k""[0].b", // K0_B = 0x18
340 I_KCOLORS"k""[1].b", // K1_B = 0x19
341 I_KCOLORS"k""[2].b", // K2_B = 0x1A
342 I_KCOLORS"k""[3].b", // K3_B = 0x1B
343 I_KCOLORS"k""[0].a", // K0_A = 0x1C
344 I_KCOLORS"k""[1].a", // K1_A = 0x1D
345 I_KCOLORS"k""[2].a", // K2_A = 0x1E
346 I_KCOLORS"k""[3].a", // K3_A = 0x1F
347};
348
349static const char *tevScaleTable[] = // CS
350{
351 "1.0f", // SCALE_1
352 "2.0f", // SCALE_2
353 "4.0f", // SCALE_4
354 "0.5f", // DIVIDE_2
355};
356
357static const char *tevBiasTable[] = // TB
358{
359 "", // ZERO,
360 "+0.5f", // ADDHALF,
361 "-0.5f", // SUBHALF,
362 "",
363};
364
365static const char *tevOpTable[] = { // TEV
366 "+", // TEVOP_ADD = 0,
367 "-", // TEVOP_SUB = 1,
368};
369
370static const char *tevCInputTable[] = // CC
371{
372 "(prev.rgb)", // CPREV,
373 "(prev.aaa)", // APREV,
374 "(c0.rgb)", // C0,
375 "(c0.aaa)", // A0,
376 "(c1.rgb)", // C1,
377 "(c1.aaa)", // A1,
378 "(c2.rgb)", // C2,
379 "(c2.aaa)", // A2,
380 "(textemp.rgb)", // TEXC,
381 "(textemp.aaa)", // TEXA,
382 "(rastemp.rgb)", // RASC,
383 "(rastemp.aaa)", // RASA,
384 "float3(1.0f, 1.0f, 1.0f)", // ONE
385 "float3(0.5f, 0.5f, 0.5f)", // HALF
386 "(konsttemp.rgb)", //"konsttemp.rgb", // KONST
387 "float3(0.0f, 0.0f, 0.0f)", // ZERO
388 ///added extra values to map clamped values
389 "(cprev.rgb)", // CPREV,
390 "(cprev.aaa)", // APREV,
391 "(cc0.rgb)", // C0,
392 "(cc0.aaa)", // A0,
393 "(cc1.rgb)", // C1,
394 "(cc1.aaa)", // A1,
395 "(cc2.rgb)", // C2,
396 "(cc2.aaa)", // A2,
397 "(textemp.rgb)", // TEXC,
398 "(textemp.aaa)", // TEXA,
399 "(crastemp.rgb)", // RASC,
400 "(crastemp.aaa)", // RASA,
401 "float3(1.0f, 1.0f, 1.0f)", // ONE
402 "float3(0.5f, 0.5f, 0.5f)", // HALF
403 "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST
404 "float3(0.0f, 0.0f, 0.0f)", // ZERO
405 "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4"
406};
407
408static const char *tevAInputTable[] = // CA
409{
410 "prev", // APREV,
411 "c0", // A0,
412 "c1", // A1,
413 "c2", // A2,
414 "textemp", // TEXA,
415 "rastemp", // RASA,
416 "konsttemp", // KONST, (hw1 had quarter)
417 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
418 ///added extra values to map clamped values
419 "cprev", // APREV,
420 "cc0", // A0,
421 "cc1", // A1,
422 "cc2", // A2,
423 "textemp", // TEXA,
424 "crastemp", // RASA,
425 "ckonsttemp", // KONST, (hw1 had quarter)
426 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
427 "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8",
428 "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12",
429};
430
431static const char *tevRasTable[] =
432{
433 "colors_0",
434 "colors_1",
435 "ERROR13", //2
436 "ERROR14", //3
437 "ERROR15", //4
438 "float4(alphabump,alphabump,alphabump,alphabump)", // use bump alpha
439 "(float4(alphabump,alphabump,alphabump,alphabump)*(255.0f/248.0f))", //normalized
440 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
441};
442
443//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
444
445static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
446static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
447static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
448//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"};
449static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)", "*(240.0f/255.0f)", "*(248.0f/255.0f)"};
450static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
451static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
452static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" };
453static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
454
455#define WRITEp+=sprintf p+=sprintf
456
457static char swapModeTable[4][5];
458
459static char text[16384];
460
461struct RegisterState
462{
463 bool ColorNeedOverflowControl;
464 bool AlphaNeedOverflowControl;
465 bool AuxStored;
466};
467
468static RegisterState RegisterStates[4];
469
470static void BuildSwapModeTable()
471{
472 static const char *swapColors = "rgba";
473 for (int i = 0; i < 4; i++)
474 {
475 swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
476 swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2];
477 swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1];
478 swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2];
479 swapModeTable[i][4] = 0;
480 }
481}
482
483// We can't use function defines since the Qualcomm shader compiler doesn't support it
484static const char *GLSLConvertFunctions[] =
485{
486 "frac", // HLSL
487 "fract", // GLSL
488 "lerp",
489 "mix"
490};
491#define FUNC_FRAC0 0
492#define FUNC_LERP2 2
493
494const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num)
495{
496 if (ApiType == API_OPENGL)
497 return ""; // Nothing to do here
498 static char result[64];
499 sprintf(result, " : register(%s%d)", prefix, num);
500 return result;
501}
502
503const char *WriteLocation(API_TYPE ApiType)
504{
505 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
506 return "";
507 static char result[64];
508 sprintf(result, "uniform ");
509 return result;
510}
511
512const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
513{
514 setlocale(LC_NUMERIC1, "C"); // Reset locale for compilation
515 text[sizeof(text) - 1] = 0x7C; // canary
516
517 BuildSwapModeTable(); // Needed for WriteStage
518 int numStages = bpmem.genMode.numtevstages + 1;
519 int numTexgen = bpmem.genMode.numtexgens;
520
521 bool per_pixel_depth = bpmem.ztex2.op != ZTEXTURE_DISABLE0 && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable;
522 bool bOpenGL = ApiType == API_OPENGL;
523 char *p = text;
524 WRITEp+=sprintf(p, "//Pixel Shader for TEV stages\n");
525 WRITEp+=sprintf(p, "//%i TEV stages, %i texgens, XXX IND stages\n",
526 numStages, numTexgen/*, bpmem.genMode.numindstages*/);
527
528 int nIndirectStagesUsed = 0;
529 if (bpmem.genMode.numindstages > 0)
530 {
531 for (int i = 0; i < numStages; ++i)
532 {
533 if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
534 nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
535 }
536 }
537
538 if (ApiType == API_OPENGL)
539 {
540
541 // A function here
542 // Fmod implementation gleaned from Nvidia
543 // At http://http.developer.nvidia.com/Cg/fmod.html
544 WRITEp+=sprintf(p, "float fmod( float x, float y )\n");
545 WRITEp+=sprintf(p, "{\n");
546 WRITEp+=sprintf(p, "\tfloat z = fract( abs( x / y) ) * abs( y );\n");
547 WRITEp+=sprintf(p, "\treturn (x < 0) ? -z : z;\n");
548 WRITEp+=sprintf(p, "}\n");
549
550 for (int i = 0; i < 8; ++i)
551 WRITEp+=sprintf(p, "uniform sampler2D samp%d;\n", i);
552 }
553 else
554 {
555 // Declare samplers
556 if (ApiType != API_D3D11)
557 {
558 WRITEp+=sprintf(p, "uniform sampler2D ");
559 }
560 else
561 {
562 WRITEp+=sprintf(p, "sampler ");
563 }
564
565 bool bfirst = true;
566 for (int i = 0; i < 8; ++i)
567 {
568 WRITEp+=sprintf(p, "%s samp%d %s", bfirst?"":",", i, WriteRegister(ApiType, "s", i));
569 bfirst = false;
570 }
571 WRITEp+=sprintf(p, ";\n");
572 if (ApiType == API_D3D11)
573 {
574 WRITEp+=sprintf(p, "Texture2D ");
575 bfirst = true;
576 for (int i = 0; i < 8; ++i)
577 {
578 WRITEp+=sprintf(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i);
579 bfirst = false;
580 }
581 WRITEp+=sprintf(p, ";\n");
582 }
583 }
584
585 WRITEp+=sprintf(p, "\n");
586 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
587 WRITEp+=sprintf(p, "layout(std140) uniform PSBlock {\n");
588
589 WRITEp+=sprintf(p, "\t%sfloat4 " I_COLORS"color""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_COLORS0));
590 WRITEp+=sprintf(p, "\t%sfloat4 " I_KCOLORS"k""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_KCOLORS(0 + 4)));
591 WRITEp+=sprintf(p, "\t%sfloat4 " I_ALPHA"alphaRef""[1] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ALPHA((0 + 4) + 4)));
592 WRITEp+=sprintf(p, "\t%sfloat4 " I_TEXDIMS"texdim""[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXDIMS(((0 + 4) + 4) + 1)));
593 WRITEp+=sprintf(p, "\t%sfloat4 " I_ZBIAS"czbias""[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ZBIAS((((0 + 4) + 4) + 1) + 8)));
594 WRITEp+=sprintf(p, "\t%sfloat4 " I_INDTEXSCALE"cindscale""[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXSCALE(((((0 + 4) + 4) + 1) + 8) + 2)));
595 WRITEp+=sprintf(p, "\t%sfloat4 " I_INDTEXMTX"cindmtx""[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXMTX((((((0 + 4) + 4) + 1) + 8) + 2) + 2)));
596 WRITEp+=sprintf(p, "\t%sfloat4 " I_FOG"cfog""[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG(((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6)));
597
598 // For pixel lighting
599 WRITEp+=sprintf(p, "\t%sfloat4 " I_PLIGHTS"cPLights""[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS((((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6) + 3)));
600 WRITEp+=sprintf(p, "\t%sfloat4 " I_PMATERIALS"cPmtrl""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS(((((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6) + 3) + 40)));
601
602 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
603 WRITEp+=sprintf(p, "};\n");
604
605 if (ApiType == API_OPENGL)
606 {
607 WRITEp+=sprintf(p, "COLOROUT(ocol0)\n");
608 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
609 WRITEp+=sprintf(p, "COLOROUT(ocol1)\n");
610
611 if (per_pixel_depth)
612 WRITEp+=sprintf(p, "#define depth gl_FragDepth\n");
613 WRITEp+=sprintf(p, "float4 rawpos = gl_FragCoord;\n");
614
615 WRITEp+=sprintf(p, "VARYIN float4 colors_02;\n");
616 WRITEp+=sprintf(p, "VARYIN float4 colors_12;\n");
617 WRITEp+=sprintf(p, "float4 colors_0 = colors_02;\n");
618 WRITEp+=sprintf(p, "float4 colors_1 = colors_12;\n");
619
620 // compute window position if needed because binding semantic WPOS is not widely supported
621 // Let's set up attributes
622 if (xfregs.numTexGen.numTexGens < 7)
623 {
624 for (int i = 0; i < 8; ++i)
625 {
626 WRITEp+=sprintf(p, "VARYIN float3 uv%d_2;\n", i);
627 WRITEp+=sprintf(p, "float3 uv%d = uv%d_2;\n", i, i);
628 }
629 WRITEp+=sprintf(p, "VARYIN float4 clipPos_2;\n");
630 WRITEp+=sprintf(p, "float4 clipPos = clipPos_2;\n");
631 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
632 {
633 WRITEp+=sprintf(p, "VARYIN float4 Normal_2;\n");
634 WRITEp+=sprintf(p, "float4 Normal = Normal_2;\n");
635 }
636 }
637 else
638 {
639 // wpos is in w of first 4 texcoords
640 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
641 {
642 for (int i = 0; i < 8; ++i)
643 {
644 WRITEp+=sprintf(p, "VARYIN float4 uv%d_2;\n", i);
645 WRITEp+=sprintf(p, "float4 uv%d = uv%d_2;\n", i, i);
646 }
647 }
648 else
649 {
650 for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
651 {
652 WRITEp+=sprintf(p, "VARYIN float%d uv%d_2;\n", i < 4 ? 4 : 3 , i);
653 WRITEp+=sprintf(p, "float%d uv%d = uv%d_2;\n", i < 4 ? 4 : 3 , i, i);
654 }
655 }
656 WRITEp+=sprintf(p, "float4 clipPos;\n");
657 }
658 WRITEp+=sprintf(p, "void main()\n{\n");
659 }
660 else
661 {
662 WRITEp+=sprintf(p, "void main(\n");
663 if (ApiType != API_D3D11)
664 {
665 WRITEp+=sprintf(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n",
666 dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
667 per_pixel_depth ? "\n out float depth : DEPTH," : "",
668 ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
669 }
670 else
671 {
672 WRITEp+=sprintf(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
673 dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
674 per_pixel_depth ? "\n out float depth : SV_Depth," : "");
675 }
676
677 // "centroid" attribute is only supported by D3D11
678 const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : "");
679
680 WRITEp+=sprintf(p, " in %s float4 colors_0 : COLOR0,\n", optCentroid);
681 WRITEp+=sprintf(p, " in %s float4 colors_1 : COLOR1", optCentroid);
682
683 // compute window position if needed because binding semantic WPOS is not widely supported
684 if (numTexgen < 7)
685 {
686 for (int i = 0; i < numTexgen; ++i)
687 WRITEp+=sprintf(p, ",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i);
688 WRITEp+=sprintf(p, ",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen);
689 if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
690 WRITEp+=sprintf(p, ",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1);
691 WRITEp+=sprintf(p, " ) {\n");
692 }
693 else
694 {
695 // wpos is in w of first 4 texcoords
696 if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
697 {
698 for (int i = 0; i < 8; ++i)
699 WRITEp+=sprintf(p, ",\n in float4 uv%d : TEXCOORD%d", i, i);
700 }
701 else
702 {
703 for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
704 WRITEp+=sprintf(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i);
705 }
706 WRITEp+=sprintf(p, " ) {\n");
707 WRITEp+=sprintf(p, "\tfloat4 clipPos = float4(0.0f, 0.0f, 0.0f, 0.0f);");
708 }
709 }
710
711 WRITEp+=sprintf(p, " float4 c0 = " I_COLORS"color""[1], c1 = " I_COLORS"color""[2], c2 = " I_COLORS"color""[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
712 " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
713 " float alphabump=0.0f;\n"
714 " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
715 " float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
716 " float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
717 " float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
718 " float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
719
720 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
721 {
722 if (xfregs.numTexGen.numTexGens < 7)
723 {
724 WRITEp+=sprintf(p,"\tfloat3 _norm0 = normalize(Normal.xyz);\n\n");
725 WRITEp+=sprintf(p,"\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n");
726 }
727 else
728 {
729 WRITEp+=sprintf(p,"\tfloat3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n");
730 WRITEp+=sprintf(p,"\tfloat3 pos = float3(uv0.w,uv1.w,uv7.w);\n");
731 }
732
733
734 WRITEp+=sprintf(p, "\tfloat4 mat, lacc;\n"
735 "\tfloat3 ldir, h;\n"
736 "\tfloat dist, dist2, attn;\n");
737
738 p = GenerateLightingShader(p, components, I_PMATERIALS"cPmtrl", I_PLIGHTS"cPLights", "colors_", "colors_");
739 }
740
741 if (numTexgen < 7)
742 WRITEp+=sprintf(p, "\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n");
743 else
744 WRITEp+=sprintf(p, "\tclipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n");
745
746 // HACK to handle cases where the tex gen is not enabled
747 if (numTexgen == 0)
748 {
749 WRITEp+=sprintf(p, "\tfloat3 uv0 = float3(0.0f, 0.0f, 0.0f);\n");
750 }
751 else
752 {
753 for (int i = 0; i < numTexgen; ++i)
754 {
755 // optional perspective divides
756 if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ1)
757 {
758 WRITEp+=sprintf(p, "\tif (uv%d.z != 0.0f)", i);
759 WRITEp+=sprintf(p, "\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i);
760 }
761
762 WRITEp+=sprintf(p, "uv%d.xy = uv%d.xy * " I_TEXDIMS"texdim""[%d].zw;\n", i, i, i);
763 }
764 }
765
766 // indirect texture map lookup
767 for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
768 {
769 if (nIndirectStagesUsed & (1<<i))
770 {
771 int texcoord = bpmem.tevindref.getTexCoord(i);
772
773 if (texcoord < numTexgen)
774 WRITEp+=sprintf(p, "\ttempcoord = uv%d.xy * " I_INDTEXSCALE"cindscale""[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
775 else
776 WRITEp+=sprintf(p, "\ttempcoord = float2(0.0f, 0.0f);\n");
777
778 char buffer[32];
779 sprintf(buffer, "float3 indtex%d", i);
780 SampleTexture(p, buffer, "tempcoord", "abg", bpmem.tevindref.getTexMap(i), ApiType);
781 }
782 }
783
784 RegisterStates[0].AlphaNeedOverflowControl = false;
785 RegisterStates[0].ColorNeedOverflowControl = false;
786 RegisterStates[0].AuxStored = false;
787 for(int i = 1; i < 4; i++)
788 {
789 RegisterStates[i].AlphaNeedOverflowControl = true;
790 RegisterStates[i].ColorNeedOverflowControl = true;
791 RegisterStates[i].AuxStored = false;
792 }
793
794 for (int i = 0; i < numStages; i++)
795 WriteStage(p, i, ApiType); //build the equation for this stage
796
797 if (numStages)
798 {
799 // The results of the last texenv stage are put onto the screen,
800 // regardless of the used destination register
801 if(bpmem.combiners[numStages - 1].colorC.dest != 0)
802 {
803 bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
804 WRITEp+=sprintf(p, "\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
805 RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
806 }
807 if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
808 {
809 bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
810 WRITEp+=sprintf(p, "\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
811 RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
812 }
813 }
814 // emulation of unsigned 8 overflow when casting if needed
815 if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
816 WRITEp+=sprintf(p, "\tprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
817
818 AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
819 if (Pretest == AlphaTest::UNDETERMINED)
820 WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth);
821
822
823 // the screen space depth value = far z + (clip z / clip w) * z range
824 if(ApiType == API_OPENGL || ApiType == API_D3D11)
825 WRITEp+=sprintf(p, "float zCoord = rawpos.z;\n");
826 else
827 // dx9 doesn't support 4 component position, so we have to calculate it again
828 WRITEp+=sprintf(p, "float zCoord = " I_ZBIAS"czbias""[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"czbias""[1].y;\n");
829
830 // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either
831 bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel;
832 if (bpmem.ztex2.op != ZTEXTURE_DISABLE0 && !skip_ztexture)
833 {
834 // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
835 WRITEp+=sprintf(p, "zCoord = dot(" I_ZBIAS"czbias""[0].xyzw, textemp.xyzw) + " I_ZBIAS"czbias""[1].w %s;\n",
836 (bpmem.ztex2.op == ZTEXTURE_ADD1) ? "+ zCoord" : "");
837
838 // scale to make result from frac correct
839 WRITEp+=sprintf(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
840 WRITEp+=sprintf(p, "zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
841 WRITEp+=sprintf(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
842
843 // Note: depth texture out put is only written to depth buffer if late depth test is used
844 if (per_pixel_depth)
845 WRITEp+=sprintf(p, "depth = zCoord;\n");
846 }
847
848 if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
849 {
850 WRITEp+=sprintf(p, "\tocol0 = float4(prev.rgb, " I_ALPHA"alphaRef""[0].a);\n");
851 }
852 else
853 {
854 WriteFog(p, ApiType);
855 WRITEp+=sprintf(p, "\tocol0 = prev;\n");
856 }
857
858 // Use dual-source color blending to perform dst alpha in a
859 // single pass
860 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
861 {
862 if(ApiType & API_D3D9)
863 {
864 // alpha component must be 0 or the shader will not compile (Direct3D 9Ex restriction)
865 // Colors will be blended against the color from ocol1 in D3D 9...
866 WRITEp+=sprintf(p, "\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n");
867 }
868 else
869 {
870 // Colors will be blended against the alpha from ocol1...
871 WRITEp+=sprintf(p, "\tocol1 = prev;\n");
872 }
873 // ...and the alpha from ocol0 will be written to the framebuffer.
874 WRITEp+=sprintf(p, "\tocol0.a = " I_ALPHA"alphaRef""[0].a;\n");
875 }
876
877 WRITEp+=sprintf(p, "}\n");
878 if (text[sizeof(text) - 1] != 0x7C)
879 PanicAlert("PixelShader generator - buffer too small, canary has been eaten!")MsgAlert(false, WARNING, "PixelShader generator - buffer too small, canary has been eaten!"
)
;
880
881 setlocale(LC_NUMERIC1, ""); // restore locale
882 return text;
883}
884
885
886
887//table with the color compare operations
888static const char *TEVCMPColorOPTable[16] =
889{
890 "float3(0.0f, 0.0f, 0.0f)",//0
891 "float3(0.0f, 0.0f, 0.0f)",//1
892 "float3(0.0f, 0.0f, 0.0f)",//2
893 "float3(0.0f, 0.0f, 0.0f)",//3
894 "float3(0.0f, 0.0f, 0.0f)",//4
895 "float3(0.0f, 0.0f, 0.0f)",//5
896 "float3(0.0f, 0.0f, 0.0f)",//6
897 "float3(0.0f, 0.0f, 0.0f)",//7
898 " %s + ((%s.r >= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_GT 8
899 " %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_EQ 9
900 " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_GT 10
901 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_EQ 11
902 " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_GT 12
903 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_EQ 13
904 " %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)), float3(0.0f, 0.0f, 0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
905 " %s + ((float3(1.0f, 1.0f, 1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)), float3(0.0f, 0.0f, 0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
906};
907
908//table with the alpha compare operations
909static const char *TEVCMPAlphaOPTable[16] =
910{
911 "0.0f",//0
912 "0.0f",//1
913 "0.0f",//2
914 "0.0f",//3
915 "0.0f",//4
916 "0.0f",//5
917 "0.0f",//6
918 "0.0f",//7
919 " %s.a + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_R8_GT 8
920 " %s.a + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_R8_EQ 9
921 " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_GR16_GT 10
922 " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_GR16_EQ 11
923 " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_GT 12
924 " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_EQ 13
925 " %s.a + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_A8_GT 14
926 " %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15
927};
928
929static void WriteStage(char *&p, int n, API_TYPE ApiType)
930{
931 int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
932 bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
933 bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
934 bool bOpenGL = ApiType == API_OPENGL;
935 // HACK to handle cases where the tex gen is not enabled
936 if (!bHasTexCoord)
937 texcoord = 0;
938
939 WRITEp+=sprintf(p, "// TEV stage %d\n", n);
940
941 if (bHasIndStage)
942 {
943 WRITEp+=sprintf(p, "// indirect op\n");
944 // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
945 if (bpmem.tevind[n].bs != ITBA_OFF0)
946 {
947 WRITEp+=sprintf(p, "alphabump = indtex%d.%s %s;\n",
948 bpmem.tevind[n].bt,
949 tevIndAlphaSel[bpmem.tevind[n].bs],
950 tevIndAlphaScale[bpmem.tevind[n].fmt]);
951 }
952 // format
953 WRITEp+=sprintf(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
954
955 // bias
956 if (bpmem.tevind[n].bias != ITB_NONE0 )
957 WRITEp+=sprintf(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
958
959 // multiply by offset matrix and scale
960 if (bpmem.tevind[n].mid != 0)
961 {
962 if (bpmem.tevind[n].mid <= 3)
963 {
964 int mtxidx = 2*(bpmem.tevind[n].mid-1);
965 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"cindmtx""[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"cindmtx""[%d].xyz, indtevcrd%d));\n",
966 n, mtxidx, n, mtxidx+1, n);
967 }
968 else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
969 { // s matrix
970 _assert_(bpmem.tevind[n].mid >= 5){};
971 int mtxidx = 2*(bpmem.tevind[n].mid-5);
972 WRITEp+=sprintf(p, "float2 indtevtrans%d = " I_INDTEXMTX"cindmtx""[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
973 }
974 else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
975 { // t matrix
976 _assert_(bpmem.tevind[n].mid >= 9){};
977 int mtxidx = 2*(bpmem.tevind[n].mid-9);
978 WRITEp+=sprintf(p, "float2 indtevtrans%d = " I_INDTEXMTX"cindmtx""[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
979 }
980 else
981 {
982 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
983 }
984 }
985 else
986 {
987 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
988 }
989
990 // ---------
991 // Wrapping
992 // ---------
993
994 // wrap S
995 if (bpmem.tevind[n].sw == ITW_OFF0)
996 WRITEp+=sprintf(p, "wrappedcoord.x = uv%d.x;\n", texcoord);
997 else if (bpmem.tevind[n].sw == ITW_06)
998 WRITEp+=sprintf(p, "wrappedcoord.x = 0.0f;\n");
999 else
1000 WRITEp+=sprintf(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
1001
1002 // wrap T
1003 if (bpmem.tevind[n].tw == ITW_OFF0)
1004 WRITEp+=sprintf(p, "wrappedcoord.y = uv%d.y;\n", texcoord);
1005 else if (bpmem.tevind[n].tw == ITW_06)
1006 WRITEp+=sprintf(p, "wrappedcoord.y = 0.0f;\n");
1007 else
1008 WRITEp+=sprintf(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
1009
1010 if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
1011 WRITEp+=sprintf(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
1012 else
1013 WRITEp+=sprintf(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
1014 }
1015
1016 TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
1017 TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
1018
1019 if(cc.a == TEVCOLORARG_RASA11 || cc.a == TEVCOLORARG_RASC10
1020 || cc.b == TEVCOLORARG_RASA11 || cc.b == TEVCOLORARG_RASC10
1021 || cc.c == TEVCOLORARG_RASA11 || cc.c == TEVCOLORARG_RASC10
1022 || cc.d == TEVCOLORARG_RASA11 || cc.d == TEVCOLORARG_RASC10
1023 || ac.a == TEVALPHAARG_RASA5 || ac.b == TEVALPHAARG_RASA5
1024 || ac.c == TEVALPHAARG_RASA5 || ac.d == TEVALPHAARG_RASA5)
1025 {
1026 char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
1027 WRITEp+=sprintf(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
1028 WRITEp+=sprintf(p, "crastemp = %s(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1029 }
1030
1031
1032 if (bpmem.tevorders[n/2].getEnable(n&1))
1033 {
1034 if (!bHasIndStage)
1035 {
1036 // calc tevcord
1037 if (bHasTexCoord)
1038 WRITEp+=sprintf(p, "tevcoord.xy = uv%d.xy;\n", texcoord);
1039 else
1040 WRITEp+=sprintf(p, "tevcoord.xy = float2(0.0f, 0.0f);\n");
1041 }
1042
1043 char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
1044 int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
1045 SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType);
1046 }
1047 else
1048 {
1049 WRITEp+=sprintf(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
1050 }
1051
1052
1053 if (cc.a == TEVCOLORARG_KONST14 || cc.b == TEVCOLORARG_KONST14 || cc.c == TEVCOLORARG_KONST14 || cc.d == TEVCOLORARG_KONST14
1054 || ac.a == TEVALPHAARG_KONST6 || ac.b == TEVALPHAARG_KONST6 || ac.c == TEVALPHAARG_KONST6 || ac.d == TEVALPHAARG_KONST6)
1055 {
1056 int kc = bpmem.tevksel[n / 2].getKC(n & 1);
1057 int ka = bpmem.tevksel[n / 2].getKA(n & 1);
1058 WRITEp+=sprintf(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
1059 if (kc > 7 || ka > 7)
1060 {
1061 WRITEp+=sprintf(p, "ckonsttemp = %s(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1062 }
1063 else
1064 {
1065 WRITEp+=sprintf(p, "ckonsttemp = konsttemp;\n");
1066 }
1067 }
1068
1069 if(cc.a == TEVCOLORARG_CPREV0 || cc.a == TEVCOLORARG_APREV1
1070 || cc.b == TEVCOLORARG_CPREV0 || cc.b == TEVCOLORARG_APREV1
1071 || cc.c == TEVCOLORARG_CPREV0 || cc.c == TEVCOLORARG_APREV1
1072 || ac.a == TEVALPHAARG_APREV0 || ac.b == TEVALPHAARG_APREV0 || ac.c == TEVALPHAARG_APREV0)
1073 {
1074 if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
1075 {
1076 WRITEp+=sprintf(p, "cprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1077 RegisterStates[0].AlphaNeedOverflowControl = false;
1078 RegisterStates[0].ColorNeedOverflowControl = false;
1079 }
1080 else
1081 {
1082 WRITEp+=sprintf(p, "cprev = prev;\n");
1083 }
1084 RegisterStates[0].AuxStored = true;
1085 }
1086
1087 if(cc.a == TEVCOLORARG_C02 || cc.a == TEVCOLORARG_A03
1088 || cc.b == TEVCOLORARG_C02 || cc.b == TEVCOLORARG_A03
1089 || cc.c == TEVCOLORARG_C02 || cc.c == TEVCOLORARG_A03
1090 || ac.a == TEVALPHAARG_A01 || ac.b == TEVALPHAARG_A01 || ac.c == TEVALPHAARG_A01)
1091 {
1092 if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
1093 {
1094 WRITEp+=sprintf(p, "cc0 = %s(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1095 RegisterStates[1].AlphaNeedOverflowControl = false;
1096 RegisterStates[1].ColorNeedOverflowControl = false;
1097 }
1098 else
1099 {
1100 WRITEp+=sprintf(p, "cc0 = c0;\n");
1101 }
1102 RegisterStates[1].AuxStored = true;
1103 }
1104
1105 if(cc.a == TEVCOLORARG_C14 || cc.a == TEVCOLORARG_A15
1106 || cc.b == TEVCOLORARG_C14 || cc.b == TEVCOLORARG_A15
1107 || cc.c == TEVCOLORARG_C14 || cc.c == TEVCOLORARG_A15
1108 || ac.a == TEVALPHAARG_A12 || ac.b == TEVALPHAARG_A12 || ac.c == TEVALPHAARG_A12)
1109 {
1110 if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
1111 {
1112 WRITEp+=sprintf(p, "cc1 = %s(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1113 RegisterStates[2].AlphaNeedOverflowControl = false;
1114 RegisterStates[2].ColorNeedOverflowControl = false;
1115 }
1116 else
1117 {
1118 WRITEp+=sprintf(p, "cc1 = c1;\n");
1119 }
1120 RegisterStates[2].AuxStored = true;
1121 }
1122
1123 if(cc.a == TEVCOLORARG_C26 || cc.a == TEVCOLORARG_A27
1124 || cc.b == TEVCOLORARG_C26 || cc.b == TEVCOLORARG_A27
1125 || cc.c == TEVCOLORARG_C26 || cc.c == TEVCOLORARG_A27
1126 || ac.a == TEVALPHAARG_A23 || ac.b == TEVALPHAARG_A23 || ac.c == TEVALPHAARG_A23)
1127 {
1128 if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
1129 {
1130 WRITEp+=sprintf(p, "cc2 = %s(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1131 RegisterStates[3].AlphaNeedOverflowControl = false;
1132 RegisterStates[3].ColorNeedOverflowControl = false;
1133 }
1134 else
1135 {
1136 WRITEp+=sprintf(p, "cc2 = c2;\n");
1137 }
1138 RegisterStates[3].AuxStored = true;
1139 }
1140
1141 RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
1142 RegisterStates[cc.dest].AuxStored = false;
1143
1144 // combine the color channel
1145 WRITEp+=sprintf(p, "// color combine\n");
1146 if (cc.clamp)
1147 WRITEp+=sprintf(p, "%s = clamp(", tevCOutputTable[cc.dest]);
1148 else
1149 WRITEp+=sprintf(p, "%s = ", tevCOutputTable[cc.dest]);
1150
1151 // combine the color channel
1152 if (cc.bias != TevBias_COMPARE3) // if not compare
1153 {
1154 //normal color combiner goes here
1155 if (cc.shift > TEVSCALE_10)
1156 WRITEp+=sprintf(p, "%s*(", tevScaleTable[cc.shift]);
1157
1158 if (!(cc.d == TEVCOLORARG_ZERO15 && cc.op == TEVOP_ADD0))
1159 WRITEp+=sprintf(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]);
1160
1161 if (cc.a == cc.b)
1162 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.a + 16]);
1163 else if (cc.c == TEVCOLORARG_ZERO15)
1164 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.a + 16]);
1165 else if (cc.c == TEVCOLORARG_ONE12)
1166 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.b + 16]);
1167 else if (cc.a == TEVCOLORARG_ZERO15)
1168 WRITEp+=sprintf(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
1169 else if (cc.b == TEVCOLORARG_ZERO15)
1170 WRITEp+=sprintf(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]);
1171 else
1172 WRITEp+=sprintf(p, "%s(%s, %s, %s)", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
1173
1174 WRITEp+=sprintf(p, "%s", tevBiasTable[cc.bias]);
1175
1176 if (cc.shift > TEVSCALE_10)
1177 WRITEp+=sprintf(p, ")");
1178 }
1179 else
1180 {
1181 int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
1182 WRITEp+=sprintf(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table
1183 tevCInputTable[cc.d],
1184 tevCInputTable[cc.a + 16],
1185 tevCInputTable[cc.b + 16],
1186 tevCInputTable[cc.c + 16]);
1187 }
1188 if (cc.clamp)
1189 WRITEp+=sprintf(p, ", 0.0, 1.0)");
1190 WRITEp+=sprintf(p,";\n");
1191
1192 RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
1193 RegisterStates[ac.dest].AuxStored = false;
1194
1195 // combine the alpha channel
1196 WRITEp+=sprintf(p, "// alpha combine\n");
1197 if (ac.clamp)
1198 WRITEp+=sprintf(p, "%s = clamp(", tevAOutputTable[ac.dest]);
1199 else
1200 WRITEp+=sprintf(p, "%s = ", tevAOutputTable[ac.dest]);
1201
1202 if (ac.bias != TevBias_COMPARE3) // if not compare
1203 {
1204 //normal alpha combiner goes here
1205 if (ac.shift > TEVSCALE_10)
1206 WRITEp+=sprintf(p, "%s*(", tevScaleTable[ac.shift]);
1207
1208 if (!(ac.d == TEVALPHAARG_ZERO7 && ac.op == TEVOP_ADD0))
1209 WRITEp+=sprintf(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]);
1210
1211 if (ac.a == ac.b)
1212 WRITEp+=sprintf(p, "%s.a", tevAInputTable[ac.a + 8]);
1213 else if (ac.c == TEVALPHAARG_ZERO7)
1214 WRITEp+=sprintf(p, "%s.a", tevAInputTable[ac.a + 8]);
1215 else if (ac.a == TEVALPHAARG_ZERO7)
1216 WRITEp+=sprintf(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
1217 else if (ac.b == TEVALPHAARG_ZERO7)
1218 WRITEp+=sprintf(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]);
1219 else
1220 WRITEp+=sprintf(p, "%s(%s.a, %s.a, %s.a)", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
1221
1222 WRITEp+=sprintf(p, "%s",tevBiasTable[ac.bias]);
1223
1224 if (ac.shift > 0)
1225 WRITEp+=sprintf(p, ")");
1226
1227 }
1228 else
1229 {
1230 //compare alpha combiner goes here
1231 int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
1232 WRITEp+=sprintf(p, TEVCMPAlphaOPTable[cmp],
1233 tevAInputTable[ac.d],
1234 tevAInputTable[ac.a + 8],
1235 tevAInputTable[ac.b + 8],
1236 tevAInputTable[ac.c + 8]);
1237 }
1238 if (ac.clamp)
1239 WRITEp+=sprintf(p, ", 0.0, 1.0)");
1240 WRITEp+=sprintf(p, ";\n\n");
1241 WRITEp+=sprintf(p, "// TEV done\n");
1242}
1243
1244void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
1245{
1246 if (ApiType == API_D3D11)
1247 WRITEp+=sprintf(p, "%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"texdim""[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
1248 else
1249 WRITEp+=sprintf(p, "%s=%s(samp%d,%s.xy * " I_TEXDIMS"texdim""[%d].xy).%s;\n", destination, ApiType == API_OPENGL ? "texture" : "tex2D", texmap, texcoords, texmap, texswap);
1250}
1251
1252static const char *tevAlphaFuncsTable[] =
1253{
1254 "(false)", //ALPHACMP_NEVER 0
1255 "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1
1256 "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2
1257 "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3
1258 "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4
1259 "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5
1260 "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6
1261 "(true)" //ALPHACMP_ALWAYS 7
1262};
1263
1264static const char *tevAlphaFunclogicTable[] =
1265{
1266 " && ", // and
1267 " || ", // or
1268 " != ", // xor
1269 " == " // xnor
1270};
1271
1272static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth)
1273{
1274 static const char *alphaRef[2] =
1275 {
1276 I_ALPHA"alphaRef""[0].r",
1277 I_ALPHA"alphaRef""[0].g"
1278 };
1279
1280
1281 // using discard then return works the same in cg and dx9 but not in dx11
1282 WRITEp+=sprintf(p, "\tif(!( ");
1283
1284 int compindex = bpmem.alpha_test.comp0;
1285 WRITEp+=sprintf(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table
1286
1287 WRITEp+=sprintf(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op
1288
1289 compindex = bpmem.alpha_test.comp1;
1290 WRITEp+=sprintf(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
1291 WRITEp+=sprintf(p, ")) {\n");
1292
1293 WRITEp+=sprintf(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
1294 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
1295 WRITEp+=sprintf(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
1296 if(per_pixel_depth)
1297 WRITEp+=sprintf(p, "depth = 1.f;\n");
1298
1299 // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
1300 // or after texturing and alpha test. PC GPUs have no way to support this
1301 // feature properly as of 2012: depth buffer and depth test are not
1302 // programmable and the depth test is always done after texturing.
1303 // Most importantly, PC GPUs do not allow writing to the z-buffer without
1304 // writing a color value (unless color writing is disabled altogether).
1305 // We implement "depth test before texturing" by discarding the fragment
1306 // when the alpha test fail. This is not a correct implementation because
1307 // even if the depth test fails the fragment could be alpha blended, but
1308 // we don't have a choice.
1309 if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable))
1310 {
1311 WRITEp+=sprintf(p, "\t\tdiscard;\n");
1312 if (ApiType != API_D3D11)
1313 WRITEp+=sprintf(p, "\t\treturn;\n");
1314 }
1315
1316 WRITEp+=sprintf(p, "}\n");
1317}
1318
1319static const char *tevFogFuncsTable[] =
1320{
1321 "", // No Fog
1322 "", // ?
1323 "", // Linear
1324 "", // ?
1325 "\tfog = 1.0f - pow(2.0f, -8.0f * fog);\n", // exp
1326 "\tfog = 1.0f - pow(2.0f, -8.0f * fog * fog);\n", // exp2
1327 "\tfog = pow(2.0f, -8.0f * (1.0f - fog));\n", // backward exp
1328 "\tfog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" // backward exp2
1329};
1330
1331static void WriteFog(char *&p, API_TYPE ApiType)
1332{
1333 bool bOpenGL = ApiType == API_OPENGL;
1334
1335 if (bpmem.fog.c_proj_fsel.fsel == 0)
1336 return; // no Fog
1337
1338 if (bpmem.fog.c_proj_fsel.proj == 0)
1339 {
1340 // perspective
1341 // ze = A/(B - (Zs >> B_SHF)
1342 WRITEp+=sprintf (p, "\tfloat ze = " I_FOG"cfog""[1].x / (" I_FOG"cfog""[1].y - (zCoord / " I_FOG"cfog""[1].w));\n");
1343 }
1344 else
1345 {
1346 // orthographic
1347 // ze = a*Zs (here, no B_SHF)
1348 WRITEp+=sprintf (p, "\tfloat ze = " I_FOG"cfog""[1].x * zCoord;\n");
1349 }
1350
1351 // x_adjust = sqrt((x-center)^2 + k^2)/k
1352 // ze *= x_adjust
1353 //this is completely theoretical as the real hardware seems to use a table instead of calculating the values.
1354 if (bpmem.fogRange.Base.Enabled)
1355 {
1356 WRITEp+=sprintf (p, "\tfloat x_adjust = (2.0f * (clipPos.x / " I_FOG"cfog""[2].y)) - 1.0f - " I_FOG"cfog""[2].x;\n");
1357 WRITEp+=sprintf (p, "\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"cfog""[2].z * " I_FOG"cfog""[2].z) / " I_FOG"cfog""[2].z;\n");
1358 WRITEp+=sprintf (p, "\tze *= x_adjust;\n");
1359 }
1360
1361 WRITEp+=sprintf (p, "\tfloat fog = clamp(ze - " I_FOG"cfog""[1].z, 0.0, 1.0);\n");
1362
1363 if (bpmem.fog.c_proj_fsel.fsel > 3)
1364 {
1365 WRITEp+=sprintf(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]);
1366 }
1367 else
1368 {
1369 if (bpmem.fog.c_proj_fsel.fsel != 2)
1370 WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel)do { { if (LogTypes::LWARNING <= 3) GenericLog(LogTypes::LWARNING
, LogTypes::VIDEO, "/home/anal/dolphin-emu/Source/Core/VideoCommon/Src/PixelShaderGen.cpp"
, 1370, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel)
; } } while (0)
;
1371 }
1372
1373 WRITEp+=sprintf(p, "\tprev.rgb = %s(prev.rgb, " I_FOG"cfog""[0].rgb, fog);\n", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL]);
1374}