Bug Summary

File:Source/Core/VideoCommon/Src/PixelShaderGen.cpp
Location:line 247, column 5
Description:Value stored to 'ptr' is never read

Annotated Source Code

1// Copyright 2013 Dolphin Emulator Project
2// Licensed under GPLv2
3// Refer to the license.txt file included.
4
5#include <stdio.h>
6#include <cmath>
7#include <assert.h>
8#include <locale.h>
9
10#include "LightingShaderGen.h"
11#include "PixelShaderGen.h"
12#include "XFMemory.h" // for texture projection mode
13#include "BPMemory.h"
14#include "VideoConfig.h"
15#include "NativeVertexFormat.h"
16
17
18static void StageHash(u32 stage, u32* out)
19{
20 out[0] |= bpmem.combiners[stage].colorC.hex & 0xFFFFFF; // 24
21 u32 alphaC = bpmem.combiners[stage].alphaC.hex & 0xFFFFF0; // 24, strip out tswap and rswap for now
22 out[0] |= (alphaC&0xF0) << 24; // 8
23 out[1] |= alphaC >> 8; // 16
24
25 // reserve 3 bits for bpmem.tevorders[stage/2].getTexMap
26 out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 19; // 3
27 out[1] |= bpmem.tevorders[stage/2].getEnable(stage&1) << 22; // 1
28 // reserve 3 bits for bpmem.tevorders[stage/2].getColorChan
29
30 bool bHasIndStage = bpmem.tevind[stage].IsActive() && bpmem.tevind[stage].bt < bpmem.genMode.numindstages;
31 out[2] |= bHasIndStage << 2; // 1
32
33 bool needstexcoord = false;
34
35 if (bHasIndStage)
36 {
37 out[2] |= (bpmem.tevind[stage].hex & 0x17FFFF) << 3; // 21, TODO: needs an explanation
38 needstexcoord = true;
39 }
40
41
42 TevStageCombiner::ColorCombiner& cc = bpmem.combiners[stage].colorC;
43 TevStageCombiner::AlphaCombiner& ac = bpmem.combiners[stage].alphaC;
44
45 if(cc.a == TEVCOLORARG_RASA11 || cc.a == TEVCOLORARG_RASC10
46 || cc.b == TEVCOLORARG_RASA11 || cc.b == TEVCOLORARG_RASC10
47 || cc.c == TEVCOLORARG_RASA11 || cc.c == TEVCOLORARG_RASC10
48 || cc.d == TEVCOLORARG_RASA11 || cc.d == TEVCOLORARG_RASC10
49 || ac.a == TEVALPHAARG_RASA5 || ac.b == TEVALPHAARG_RASA5
50 || ac.c == TEVALPHAARG_RASA5 || ac.d == TEVALPHAARG_RASA5)
51 {
52 out[0] |= bpmem.combiners[stage].alphaC.rswap;
53 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap1 << 24; // 2
54 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2].swap2 << 26; // 2
55 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap1 << 28; // 2
56 out[2] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.rswap*2+1].swap2 << 30; // 2
57 out[1] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&1) << 23;
58 out[2] |= (bpmem.tevorders[stage/2].getColorChan(stage&1)&0x6) >> 1;
59 }
60
61 out[3] |= bpmem.tevorders[stage/2].getEnable(stage&1);
62 if (bpmem.tevorders[stage/2].getEnable(stage&1))
63 {
64 if (bHasIndStage)
65 needstexcoord = true;
66
67 out[0] |= bpmem.combiners[stage].alphaC.tswap;
68 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap1 << 1; // 2
69 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2].swap2 << 3; // 2
70 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap1 << 5; // 2
71 out[3] |= bpmem.tevksel[bpmem.combiners[stage].alphaC.tswap*2+1].swap2 << 7; // 2
72 out[1] |= bpmem.tevorders[stage/2].getTexMap(stage&1) << 16;
73 }
74
75 if (cc.a == TEVCOLORARG_KONST14 || cc.b == TEVCOLORARG_KONST14 || cc.c == TEVCOLORARG_KONST14 || cc.d == TEVCOLORARG_KONST14
76 || ac.a == TEVALPHAARG_KONST6 || ac.b == TEVALPHAARG_KONST6 || ac.c == TEVALPHAARG_KONST6 || ac.d == TEVALPHAARG_KONST6)
77 {
78 out[3] |= bpmem.tevksel[stage/2].getKC(stage&1) << 9; // 5
79 out[3] |= bpmem.tevksel[stage/2].getKA(stage&1) << 14; // 5
80 }
81
82 if (needstexcoord)
83 {
84 out[1] |= bpmem.tevorders[stage/2].getTexCoord(stage&1) << 16;
85 }
86}
87
88// Mash together all the inputs that contribute to the code of a generated pixel shader into
89// a unique identifier, basically containing all the bits. Yup, it's a lot ....
90// It would likely be a lot more efficient to build this incrementally as the attributes
91// are set...
92void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
93{
94 memset(uid->values, 0, sizeof(uid->values));
95 uid->values[0] |= bpmem.genMode.numtevstages; // 4
96 uid->values[0] |= bpmem.genMode.numtexgens << 4; // 4
97 uid->values[0] |= dstAlphaMode << 8; // 2
98 uid->values[0] |= g_ActiveConfig.bFastDepthCalc << 10; // 1
99
100 bool enablePL = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
101 uid->values[0] |= enablePL << 11; // 1
102
103 if (!enablePL)
104 {
105 uid->values[0] |= xfregs.numTexGen.numTexGens << 12; // 4
106 }
107
108 AlphaTest::TEST_RESULT alphaPreTest = bpmem.alpha_test.TestResult();
109 uid->values[0] |= alphaPreTest << 16; // 2
110
111 // numtexgens should be <= 8
112 for (unsigned int i = 0; i < bpmem.genMode.numtexgens; ++i)
113 {
114 uid->values[0] |= xfregs.texMtxInfo[i].projection << (18+i); // 1
115 }
116
117 uid->values[1] = bpmem.genMode.numindstages; // 3
118 u32 indirectStagesUsed = 0;
119 for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
120 {
121 if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
122 indirectStagesUsed |= (1 << bpmem.tevind[i].bt);
123 }
124
125 assert(indirectStagesUsed == (indirectStagesUsed & 0xF))(static_cast<void> (0));
126
127 uid->values[1] |= indirectStagesUsed << 3; // 4;
128
129 for (unsigned int i = 0; i < bpmem.genMode.numindstages; ++i)
130 {
131 if (indirectStagesUsed & (1 << i))
132 {
133 uid->values[1] |= (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens) << (7 + 3*i); // 1
134 if (bpmem.tevindref.getTexCoord(i) < bpmem.genMode.numtexgens)
135 uid->values[1] |= bpmem.tevindref.getTexCoord(i) << (8 + 3*i); // 2
136 }
137 }
138
139 u32* ptr = &uid->values[2];
140 for (unsigned int i = 0; i < bpmem.genMode.numtevstages+1u; ++i)
141 {
142 StageHash(i, ptr);
143 ptr += 4; // max: ptr = &uid->values[66]
144 }
145
146 ptr[0] |= bpmem.alpha_test.comp0; // 3
147 ptr[0] |= bpmem.alpha_test.comp1 << 3; // 3
148 ptr[0] |= bpmem.alpha_test.logic << 6; // 2
149
150 ptr[0] |= bpmem.ztex2.op << 8; // 2
151 ptr[0] |= bpmem.zcontrol.early_ztest << 10; // 1
152 ptr[0] |= bpmem.zmode.testenable << 11; // 1
153 ptr[0] |= bpmem.zmode.updateenable << 12; // 1
154
155 if (dstAlphaMode != DSTALPHA_ALPHA_PASS)
156 {
157 ptr[0] |= bpmem.fog.c_proj_fsel.fsel << 13; // 3
158 if (bpmem.fog.c_proj_fsel.fsel != 0)
159 {
160 ptr[0] |= bpmem.fog.c_proj_fsel.proj << 16; // 1
161 ptr[0] |= bpmem.fogRange.Base.Enabled << 17; // 1
162 }
163 }
164
165 ++ptr;
166 if (enablePL)
167 {
168 ptr += GetLightingShaderId(ptr);
169 *ptr++ = components;
170 }
171
172 uid->num_values = int(ptr - uid->values);
173}
174
175void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components)
176{
177 memset(uid->values, 0, sizeof(uid->values));
178 u32* ptr = uid->values;
179 *ptr++ = dstAlphaMode; // 0
180 *ptr++ = bpmem.genMode.hex; // 1
181 *ptr++ = bpmem.ztex2.hex; // 2
182 *ptr++ = bpmem.zcontrol.hex; // 3
183 *ptr++ = bpmem.zmode.hex; // 4
184 *ptr++ = g_ActiveConfig.bFastDepthCalc; // 5
185 *ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting; // 6
186 *ptr++ = xfregs.numTexGen.hex; // 7
187
188 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
189 {
190 *ptr++ = xfregs.color[0].hex;
191 *ptr++ = xfregs.alpha[0].hex;
192 *ptr++ = xfregs.color[1].hex;
193 *ptr++ = xfregs.alpha[1].hex;
194 *ptr++ = components;
195 }
196
197 for (unsigned int i = 0; i < 8; ++i)
198 *ptr++ = xfregs.texMtxInfo[i].hex; // 8-15
199
200 for (unsigned int i = 0; i < 16; ++i)
201 *ptr++ = bpmem.tevind[i].hex; // 16-31
202
203 *ptr++ = bpmem.tevindref.hex; // 32
204
205 for (u32 i = 0; i < bpmem.genMode.numtevstages+1u; ++i) // up to 16 times
206 {
207 *ptr++ = bpmem.combiners[i].colorC.hex; // 33+5*i
208 *ptr++ = bpmem.combiners[i].alphaC.hex; // 34+5*i
209 *ptr++ = bpmem.tevind[i].hex; // 35+5*i
210 *ptr++ = bpmem.tevksel[i/2].hex; // 36+5*i
211 *ptr++ = bpmem.tevorders[i/2].hex; // 37+5*i
212 }
213
214 ptr = &uid->values[113];
215
216 *ptr++ = bpmem.alpha_test.hex; // 113
217
218 *ptr++ = bpmem.fog.c_proj_fsel.hex; // 114
219 *ptr++ = bpmem.fogRange.Base.hex; // 115
220
221 _assert_((ptr - uid->values) == uid->GetNumValues()){};
222}
223
224void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components)
225{
226 if (!g_ActiveConfig.bEnableShaderDebugging)
227 return;
228
229 PIXELSHADERUIDSAFE new_id;
230 GetSafePixelShaderId(&new_id, dstAlphaMode, components);
231
232 if (!(old_id == new_id))
233 {
234 std::string new_code(GeneratePixelShaderCode(dstAlphaMode, api, components));
235 if (old_code != new_code)
236 {
237 _assert_(old_id.GetNumValues() == new_id.GetNumValues()){};
238
239 char msg[8192];
240 char* ptr = msg;
241 ptr += sprintf(ptr, "Pixel shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
242 const int N = new_id.GetNumValues();
243 for (int i = 0; i < N/2; ++i)
244 ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
245 new_id.values[2*i], new_id.values[2*i+1]);
246 if (N % 2)
247 ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
Value stored to 'ptr' is never read
248
249 static int num_failures = 0;
250 char szTemp[MAX_PATH4096];
251 sprintf(szTemp, "%spsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
252 std::ofstream file;
253 OpenFStream(file, szTemp, std::ios_base::out);
254 file << msg;
255 file << "\n\nOld shader code:\n" << old_code;
256 file << "\n\nNew shader code:\n" << new_code;
257 file.close();
258
259 PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp)MsgAlert(false, WARNING, "Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s."
, szTemp)
;
260 }
261 }
262}
263
264// old tev->pixelshader notes
265//
266// color for this stage (alpha, color) is given by bpmem.tevorders[0].colorchan0
267// konstant for this stage (alpha, color) is given by bpmem.tevksel
268// inputs are given by bpmem.combiners[0].colorC.a/b/c/d << could be current channel color
269// according to GXTevColorArg table above
270// output is given by .outreg
271// tevtemp is set according to swapmodetables and
272
273static void WriteStage(char *&p, int n, API_TYPE ApiType);
274static void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
275// static void WriteAlphaCompare(char *&p, int num, int comp);
276static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
277static void WriteFog(char *&p, API_TYPE ApiType);
278
279static const char *tevKSelTableC[] = // KCSEL
280{
281 "1.0f,1.0f,1.0f", // 1 = 0x00
282 "0.875f,0.875f,0.875f", // 7_8 = 0x01
283 "0.75f,0.75f,0.75f", // 3_4 = 0x02
284 "0.625f,0.625f,0.625f", // 5_8 = 0x03
285 "0.5f,0.5f,0.5f", // 1_2 = 0x04
286 "0.375f,0.375f,0.375f", // 3_8 = 0x05
287 "0.25f,0.25f,0.25f", // 1_4 = 0x06
288 "0.125f,0.125f,0.125f", // 1_8 = 0x07
289 "ERROR1", // 0x08
290 "ERROR2", // 0x09
291 "ERROR3", // 0x0a
292 "ERROR4", // 0x0b
293 I_KCOLORS"k""[0].rgb", // K0 = 0x0C
294 I_KCOLORS"k""[1].rgb", // K1 = 0x0D
295 I_KCOLORS"k""[2].rgb", // K2 = 0x0E
296 I_KCOLORS"k""[3].rgb", // K3 = 0x0F
297 I_KCOLORS"k""[0].rrr", // K0_R = 0x10
298 I_KCOLORS"k""[1].rrr", // K1_R = 0x11
299 I_KCOLORS"k""[2].rrr", // K2_R = 0x12
300 I_KCOLORS"k""[3].rrr", // K3_R = 0x13
301 I_KCOLORS"k""[0].ggg", // K0_G = 0x14
302 I_KCOLORS"k""[1].ggg", // K1_G = 0x15
303 I_KCOLORS"k""[2].ggg", // K2_G = 0x16
304 I_KCOLORS"k""[3].ggg", // K3_G = 0x17
305 I_KCOLORS"k""[0].bbb", // K0_B = 0x18
306 I_KCOLORS"k""[1].bbb", // K1_B = 0x19
307 I_KCOLORS"k""[2].bbb", // K2_B = 0x1A
308 I_KCOLORS"k""[3].bbb", // K3_B = 0x1B
309 I_KCOLORS"k""[0].aaa", // K0_A = 0x1C
310 I_KCOLORS"k""[1].aaa", // K1_A = 0x1D
311 I_KCOLORS"k""[2].aaa", // K2_A = 0x1E
312 I_KCOLORS"k""[3].aaa", // K3_A = 0x1F
313};
314
315static const char *tevKSelTableA[] = // KASEL
316{
317 "1.0f", // 1 = 0x00
318 "0.875f",// 7_8 = 0x01
319 "0.75f", // 3_4 = 0x02
320 "0.625f",// 5_8 = 0x03
321 "0.5f", // 1_2 = 0x04
322 "0.375f",// 3_8 = 0x05
323 "0.25f", // 1_4 = 0x06
324 "0.125f",// 1_8 = 0x07
325 "ERROR5", // 0x08
326 "ERROR6", // 0x09
327 "ERROR7", // 0x0a
328 "ERROR8", // 0x0b
329 "ERROR9", // 0x0c
330 "ERROR10", // 0x0d
331 "ERROR11", // 0x0e
332 "ERROR12", // 0x0f
333 I_KCOLORS"k""[0].r", // K0_R = 0x10
334 I_KCOLORS"k""[1].r", // K1_R = 0x11
335 I_KCOLORS"k""[2].r", // K2_R = 0x12
336 I_KCOLORS"k""[3].r", // K3_R = 0x13
337 I_KCOLORS"k""[0].g", // K0_G = 0x14
338 I_KCOLORS"k""[1].g", // K1_G = 0x15
339 I_KCOLORS"k""[2].g", // K2_G = 0x16
340 I_KCOLORS"k""[3].g", // K3_G = 0x17
341 I_KCOLORS"k""[0].b", // K0_B = 0x18
342 I_KCOLORS"k""[1].b", // K1_B = 0x19
343 I_KCOLORS"k""[2].b", // K2_B = 0x1A
344 I_KCOLORS"k""[3].b", // K3_B = 0x1B
345 I_KCOLORS"k""[0].a", // K0_A = 0x1C
346 I_KCOLORS"k""[1].a", // K1_A = 0x1D
347 I_KCOLORS"k""[2].a", // K2_A = 0x1E
348 I_KCOLORS"k""[3].a", // K3_A = 0x1F
349};
350
351static const char *tevScaleTable[] = // CS
352{
353 "1.0f", // SCALE_1
354 "2.0f", // SCALE_2
355 "4.0f", // SCALE_4
356 "0.5f", // DIVIDE_2
357};
358
359static const char *tevBiasTable[] = // TB
360{
361 "", // ZERO,
362 "+0.5f", // ADDHALF,
363 "-0.5f", // SUBHALF,
364 "",
365};
366
367static const char *tevOpTable[] = { // TEV
368 "+", // TEVOP_ADD = 0,
369 "-", // TEVOP_SUB = 1,
370};
371
372static const char *tevCInputTable[] = // CC
373{
374 "(prev.rgb)", // CPREV,
375 "(prev.aaa)", // APREV,
376 "(c0.rgb)", // C0,
377 "(c0.aaa)", // A0,
378 "(c1.rgb)", // C1,
379 "(c1.aaa)", // A1,
380 "(c2.rgb)", // C2,
381 "(c2.aaa)", // A2,
382 "(textemp.rgb)", // TEXC,
383 "(textemp.aaa)", // TEXA,
384 "(rastemp.rgb)", // RASC,
385 "(rastemp.aaa)", // RASA,
386 "float3(1.0f, 1.0f, 1.0f)", // ONE
387 "float3(0.5f, 0.5f, 0.5f)", // HALF
388 "(konsttemp.rgb)", //"konsttemp.rgb", // KONST
389 "float3(0.0f, 0.0f, 0.0f)", // ZERO
390 ///added extra values to map clamped values
391 "(cprev.rgb)", // CPREV,
392 "(cprev.aaa)", // APREV,
393 "(cc0.rgb)", // C0,
394 "(cc0.aaa)", // A0,
395 "(cc1.rgb)", // C1,
396 "(cc1.aaa)", // A1,
397 "(cc2.rgb)", // C2,
398 "(cc2.aaa)", // A2,
399 "(textemp.rgb)", // TEXC,
400 "(textemp.aaa)", // TEXA,
401 "(crastemp.rgb)", // RASC,
402 "(crastemp.aaa)", // RASA,
403 "float3(1.0f, 1.0f, 1.0f)", // ONE
404 "float3(0.5f, 0.5f, 0.5f)", // HALF
405 "(ckonsttemp.rgb)", //"konsttemp.rgb", // KONST
406 "float3(0.0f, 0.0f, 0.0f)", // ZERO
407 "PADERROR1", "PADERROR2", "PADERROR3", "PADERROR4"
408};
409
410static const char *tevAInputTable[] = // CA
411{
412 "prev", // APREV,
413 "c0", // A0,
414 "c1", // A1,
415 "c2", // A2,
416 "textemp", // TEXA,
417 "rastemp", // RASA,
418 "konsttemp", // KONST, (hw1 had quarter)
419 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
420 ///added extra values to map clamped values
421 "cprev", // APREV,
422 "cc0", // A0,
423 "cc1", // A1,
424 "cc2", // A2,
425 "textemp", // TEXA,
426 "crastemp", // RASA,
427 "ckonsttemp", // KONST, (hw1 had quarter)
428 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // ZERO
429 "PADERROR5", "PADERROR6", "PADERROR7", "PADERROR8",
430 "PADERROR9", "PADERROR10", "PADERROR11", "PADERROR12",
431};
432
433static const char *tevRasTable[] =
434{
435 "colors_0",
436 "colors_1",
437 "ERROR13", //2
438 "ERROR14", //3
439 "ERROR15", //4
440 "float4(alphabump,alphabump,alphabump,alphabump)", // use bump alpha
441 "(float4(alphabump,alphabump,alphabump,alphabump)*(255.0f/248.0f))", //normalized
442 "float4(0.0f, 0.0f, 0.0f, 0.0f)", // zero
443};
444
445//static const char *tevTexFunc[] = { "tex2D", "texRECT" };
446
447static const char *tevCOutputTable[] = { "prev.rgb", "c0.rgb", "c1.rgb", "c2.rgb" };
448static const char *tevAOutputTable[] = { "prev.a", "c0.a", "c1.a", "c2.a" };
449static const char *tevIndAlphaSel[] = {"", "x", "y", "z"};
450//static const char *tevIndAlphaScale[] = {"", "*32", "*16", "*8"};
451static const char *tevIndAlphaScale[] = {"*(248.0f/255.0f)", "*(224.0f/255.0f)", "*(240.0f/255.0f)", "*(248.0f/255.0f)"};
452static const char *tevIndBiasField[] = {"", "x", "y", "xy", "z", "xz", "yz", "xyz"}; // indexed by bias
453static const char *tevIndBiasAdd[] = {"-128.0f", "1.0f", "1.0f", "1.0f" }; // indexed by fmt
454static const char *tevIndWrapStart[] = {"0.0f", "256.0f", "128.0f", "64.0f", "32.0f", "16.0f", "0.001f" };
455static const char *tevIndFmtScale[] = {"255.0f", "31.0f", "15.0f", "7.0f" };
456
457#define WRITEp+=sprintf p+=sprintf
458
459static char swapModeTable[4][5];
460
461static char text[16384];
462
463struct RegisterState
464{
465 bool ColorNeedOverflowControl;
466 bool AlphaNeedOverflowControl;
467 bool AuxStored;
468};
469
470static RegisterState RegisterStates[4];
471
472static void BuildSwapModeTable()
473{
474 static const char *swapColors = "rgba";
475 for (int i = 0; i < 4; i++)
476 {
477 swapModeTable[i][0] = swapColors[bpmem.tevksel[i*2].swap1];
478 swapModeTable[i][1] = swapColors[bpmem.tevksel[i*2].swap2];
479 swapModeTable[i][2] = swapColors[bpmem.tevksel[i*2+1].swap1];
480 swapModeTable[i][3] = swapColors[bpmem.tevksel[i*2+1].swap2];
481 swapModeTable[i][4] = 0;
482 }
483}
484
485// We can't use function defines since the Qualcomm shader compiler doesn't support it
486static const char *GLSLConvertFunctions[] =
487{
488 "frac", // HLSL
489 "fract", // GLSL
490 "lerp",
491 "mix"
492};
493#define FUNC_FRAC0 0
494#define FUNC_LERP2 2
495
496const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num)
497{
498 if (ApiType == API_OPENGL)
499 return ""; // Nothing to do here
500 static char result[64];
501 sprintf(result, " : register(%s%d)", prefix, num);
502 return result;
503}
504
505const char *WriteLocation(API_TYPE ApiType)
506{
507 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
508 return "";
509 static char result[64];
510 sprintf(result, "uniform ");
511 return result;
512}
513
514const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
515{
516 setlocale(LC_NUMERIC1, "C"); // Reset locale for compilation
517 text[sizeof(text) - 1] = 0x7C; // canary
518
519 BuildSwapModeTable(); // Needed for WriteStage
520 int numStages = bpmem.genMode.numtevstages + 1;
521 int numTexgen = bpmem.genMode.numtexgens;
522
523 bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE0 && !bpmem.zcontrol.early_ztest && bpmem.zmode.testenable) || !g_ActiveConfig.bFastDepthCalc;
524 bool bOpenGL = ApiType == API_OPENGL;
525 char *p = text;
526 WRITEp+=sprintf(p, "//Pixel Shader for TEV stages\n");
527 WRITEp+=sprintf(p, "//%i TEV stages, %i texgens, XXX IND stages\n",
528 numStages, numTexgen/*, bpmem.genMode.numindstages*/);
529
530 int nIndirectStagesUsed = 0;
531 if (bpmem.genMode.numindstages > 0)
532 {
533 for (int i = 0; i < numStages; ++i)
534 {
535 if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
536 nIndirectStagesUsed |= 1 << bpmem.tevind[i].bt;
537 }
538 }
539
540 if (ApiType == API_OPENGL)
541 {
542
543 // A function here
544 // Fmod implementation gleaned from Nvidia
545 // At http://http.developer.nvidia.com/Cg/fmod.html
546 WRITEp+=sprintf(p, "float fmod( float x, float y )\n");
547 WRITEp+=sprintf(p, "{\n");
548 WRITEp+=sprintf(p, "\tfloat z = fract( abs( x / y) ) * abs( y );\n");
549 WRITEp+=sprintf(p, "\treturn (x < 0.0) ? -z : z;\n");
550 WRITEp+=sprintf(p, "}\n");
551
552 for (int i = 0; i < 8; ++i)
553 WRITEp+=sprintf(p, "uniform sampler2D samp%d;\n", i);
554 }
555 else
556 {
557 // Declare samplers
558 if (ApiType != API_D3D11)
559 {
560 WRITEp+=sprintf(p, "uniform sampler2D ");
561 }
562 else
563 {
564 WRITEp+=sprintf(p, "sampler ");
565 }
566
567 bool bfirst = true;
568 for (int i = 0; i < 8; ++i)
569 {
570 WRITEp+=sprintf(p, "%s samp%d %s", bfirst?"":",", i, WriteRegister(ApiType, "s", i));
571 bfirst = false;
572 }
573 WRITEp+=sprintf(p, ";\n");
574 if (ApiType == API_D3D11)
575 {
576 WRITEp+=sprintf(p, "Texture2D ");
577 bfirst = true;
578 for (int i = 0; i < 8; ++i)
579 {
580 WRITEp+=sprintf(p, "%s Tex%d : register(t%d)", bfirst?"":",", i, i);
581 bfirst = false;
582 }
583 WRITEp+=sprintf(p, ";\n");
584 }
585 }
586
587 WRITEp+=sprintf(p, "\n");
588 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
589 WRITEp+=sprintf(p, "layout(std140) uniform PSBlock {\n");
590
591 WRITEp+=sprintf(p, "\t%sfloat4 " I_COLORS"color""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_COLORS0));
592 WRITEp+=sprintf(p, "\t%sfloat4 " I_KCOLORS"k""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_KCOLORS(0 + 4)));
593 WRITEp+=sprintf(p, "\t%sfloat4 " I_ALPHA"alphaRef""[1] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ALPHA((0 + 4) + 4)));
594 WRITEp+=sprintf(p, "\t%sfloat4 " I_TEXDIMS"texdim""[8] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXDIMS(((0 + 4) + 4) + 1)));
595 WRITEp+=sprintf(p, "\t%sfloat4 " I_ZBIAS"czbias""[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_ZBIAS((((0 + 4) + 4) + 1) + 8)));
596 WRITEp+=sprintf(p, "\t%sfloat4 " I_INDTEXSCALE"cindscale""[2] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXSCALE(((((0 + 4) + 4) + 1) + 8) + 2)));
597 WRITEp+=sprintf(p, "\t%sfloat4 " I_INDTEXMTX"cindmtx""[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_INDTEXMTX((((((0 + 4) + 4) + 1) + 8) + 2) + 2)));
598 WRITEp+=sprintf(p, "\t%sfloat4 " I_FOG"cfog""[3] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_FOG(((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6)));
599
600 // For pixel lighting
601 WRITEp+=sprintf(p, "\t%sfloat4 " I_PLIGHTS"cPLights""[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PLIGHTS((((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6) + 3)));
602 WRITEp+=sprintf(p, "\t%sfloat4 " I_PMATERIALS"cPmtrl""[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PMATERIALS(((((((((0 + 4) + 4) + 1) + 8) + 2) + 2) + 6) + 3) + 40)));
603
604 if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
605 WRITEp+=sprintf(p, "};\n");
606
607 if (ApiType == API_OPENGL)
608 {
609 WRITEp+=sprintf(p, "COLOROUT(ocol0)\n");
610 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
611 WRITEp+=sprintf(p, "COLOROUT(ocol1)\n");
612
613 if (per_pixel_depth)
614 WRITEp+=sprintf(p, "#define depth gl_FragDepth\n");
615 WRITEp+=sprintf(p, "float4 rawpos = gl_FragCoord;\n");
616
617 WRITEp+=sprintf(p, "VARYIN float4 colors_02;\n");
618 WRITEp+=sprintf(p, "VARYIN float4 colors_12;\n");
619 WRITEp+=sprintf(p, "float4 colors_0 = colors_02;\n");
620 WRITEp+=sprintf(p, "float4 colors_1 = colors_12;\n");
621
622 // compute window position if needed because binding semantic WPOS is not widely supported
623 // Let's set up attributes
624 if (xfregs.numTexGen.numTexGens < 7)
625 {
626 for (int i = 0; i < 8; ++i)
627 {
628 WRITEp+=sprintf(p, "VARYIN float3 uv%d_2;\n", i);
629 WRITEp+=sprintf(p, "float3 uv%d = uv%d_2;\n", i, i);
630 }
631 WRITEp+=sprintf(p, "VARYIN float4 clipPos_2;\n");
632 WRITEp+=sprintf(p, "float4 clipPos = clipPos_2;\n");
633 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
634 {
635 WRITEp+=sprintf(p, "VARYIN float4 Normal_2;\n");
636 WRITEp+=sprintf(p, "float4 Normal = Normal_2;\n");
637 }
638 }
639 else
640 {
641 // wpos is in w of first 4 texcoords
642 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
643 {
644 for (int i = 0; i < 8; ++i)
645 {
646 WRITEp+=sprintf(p, "VARYIN float4 uv%d_2;\n", i);
647 WRITEp+=sprintf(p, "float4 uv%d = uv%d_2;\n", i, i);
648 }
649 }
650 else
651 {
652 for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
653 {
654 WRITEp+=sprintf(p, "VARYIN float%d uv%d_2;\n", i < 4 ? 4 : 3 , i);
655 WRITEp+=sprintf(p, "float%d uv%d = uv%d_2;\n", i < 4 ? 4 : 3 , i, i);
656 }
657 }
658 WRITEp+=sprintf(p, "float4 clipPos;\n");
659 }
660 WRITEp+=sprintf(p, "void main()\n{\n");
661 }
662 else
663 {
664 WRITEp+=sprintf(p, "void main(\n");
665 if (ApiType != API_D3D11)
666 {
667 WRITEp+=sprintf(p, " out float4 ocol0 : COLOR0,%s%s\n in float4 rawpos : %s,\n",
668 dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : COLOR1," : "",
669 per_pixel_depth ? "\n out float depth : DEPTH," : "",
670 ApiType & API_D3D9_SM20 ? "POSITION" : "VPOS");
671 }
672 else
673 {
674 WRITEp+=sprintf(p, " out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
675 dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
676 per_pixel_depth ? "\n out float depth : SV_Depth," : "");
677 }
678
679 // "centroid" attribute is only supported by D3D11
680 const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : "");
681
682 WRITEp+=sprintf(p, " in %s float4 colors_0 : COLOR0,\n", optCentroid);
683 WRITEp+=sprintf(p, " in %s float4 colors_1 : COLOR1", optCentroid);
684
685 // compute window position if needed because binding semantic WPOS is not widely supported
686 if (numTexgen < 7)
687 {
688 for (int i = 0; i < numTexgen; ++i)
689 WRITEp+=sprintf(p, ",\n in %s float3 uv%d : TEXCOORD%d", optCentroid, i, i);
690 WRITEp+=sprintf(p, ",\n in %s float4 clipPos : TEXCOORD%d", optCentroid, numTexgen);
691 if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
692 WRITEp+=sprintf(p, ",\n in %s float4 Normal : TEXCOORD%d", optCentroid, numTexgen + 1);
693 WRITEp+=sprintf(p, " ) {\n");
694 }
695 else
696 {
697 // wpos is in w of first 4 texcoords
698 if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
699 {
700 for (int i = 0; i < 8; ++i)
701 WRITEp+=sprintf(p, ",\n in float4 uv%d : TEXCOORD%d", i, i);
702 }
703 else
704 {
705 for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
706 WRITEp+=sprintf(p, ",\n in float%d uv%d : TEXCOORD%d", i < 4 ? 4 : 3 , i, i);
707 }
708 WRITEp+=sprintf(p, " ) {\n");
709 WRITEp+=sprintf(p, "\tfloat4 clipPos = float4(0.0f, 0.0f, 0.0f, 0.0f);");
710 }
711 }
712
713 WRITEp+=sprintf(p, " float4 c0 = " I_COLORS"color""[1], c1 = " I_COLORS"color""[2], c2 = " I_COLORS"color""[3], prev = float4(0.0f, 0.0f, 0.0f, 0.0f), textemp = float4(0.0f, 0.0f, 0.0f, 0.0f), rastemp = float4(0.0f, 0.0f, 0.0f, 0.0f), konsttemp = float4(0.0f, 0.0f, 0.0f, 0.0f);\n"
714 " float3 comp16 = float3(1.0f, 255.0f, 0.0f), comp24 = float3(1.0f, 255.0f, 255.0f*255.0f);\n"
715 " float alphabump=0.0f;\n"
716 " float3 tevcoord=float3(0.0f, 0.0f, 0.0f);\n"
717 " float2 wrappedcoord=float2(0.0f,0.0f), tempcoord=float2(0.0f,0.0f);\n"
718 " float4 cc0=float4(0.0f,0.0f,0.0f,0.0f), cc1=float4(0.0f,0.0f,0.0f,0.0f);\n"
719 " float4 cc2=float4(0.0f,0.0f,0.0f,0.0f), cprev=float4(0.0f,0.0f,0.0f,0.0f);\n"
720 " float4 crastemp=float4(0.0f,0.0f,0.0f,0.0f),ckonsttemp=float4(0.0f,0.0f,0.0f,0.0f);\n\n");
721
722 if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
723 {
724 if (xfregs.numTexGen.numTexGens < 7)
725 {
726 WRITEp+=sprintf(p,"\tfloat3 _norm0 = normalize(Normal.xyz);\n\n");
727 WRITEp+=sprintf(p,"\tfloat3 pos = float3(clipPos.x,clipPos.y,Normal.w);\n");
728 }
729 else
730 {
731 WRITEp+=sprintf(p,"\tfloat3 _norm0 = normalize(float3(uv4.w,uv5.w,uv6.w));\n\n");
732 WRITEp+=sprintf(p,"\tfloat3 pos = float3(uv0.w,uv1.w,uv7.w);\n");
733 }
734
735
736 WRITEp+=sprintf(p, "\tfloat4 mat, lacc;\n"
737 "\tfloat3 ldir, h;\n"
738 "\tfloat dist, dist2, attn;\n");
739
740 p = GenerateLightingShader(p, components, I_PMATERIALS"cPmtrl", I_PLIGHTS"cPLights", "colors_", "colors_");
741 }
742
743 if (numTexgen < 7)
744 WRITEp+=sprintf(p, "\tclipPos = float4(rawpos.x, rawpos.y, clipPos.z, clipPos.w);\n");
745 else
746 WRITEp+=sprintf(p, "\tclipPos = float4(rawpos.x, rawpos.y, uv2.w, uv3.w);\n");
747
748 // HACK to handle cases where the tex gen is not enabled
749 if (numTexgen == 0)
750 {
751 WRITEp+=sprintf(p, "\tfloat3 uv0 = float3(0.0f, 0.0f, 0.0f);\n");
752 }
753 else
754 {
755 for (int i = 0; i < numTexgen; ++i)
756 {
757 // optional perspective divides
758 if (xfregs.texMtxInfo[i].projection == XF_TEXPROJ_STQ1)
759 {
760 WRITEp+=sprintf(p, "\tif (uv%d.z != 0.0f)", i);
761 WRITEp+=sprintf(p, "\t\tuv%d.xy = uv%d.xy / uv%d.z;\n", i, i, i);
762 }
763
764 WRITEp+=sprintf(p, "uv%d.xy = uv%d.xy * " I_TEXDIMS"texdim""[%d].zw;\n", i, i, i);
765 }
766 }
767
768 // indirect texture map lookup
769 for (u32 i = 0; i < bpmem.genMode.numindstages; ++i)
770 {
771 if (nIndirectStagesUsed & (1<<i))
772 {
773 int texcoord = bpmem.tevindref.getTexCoord(i);
774
775 if (texcoord < numTexgen)
776 WRITEp+=sprintf(p, "\ttempcoord = uv%d.xy * " I_INDTEXSCALE"cindscale""[%d].%s;\n", texcoord, i/2, (i&1)?"zw":"xy");
777 else
778 WRITEp+=sprintf(p, "\ttempcoord = float2(0.0f, 0.0f);\n");
779
780 char buffer[32];
781 sprintf(buffer, "float3 indtex%d", i);
782 SampleTexture(p, buffer, "tempcoord", "abg", bpmem.tevindref.getTexMap(i), ApiType);
783 }
784 }
785
786 RegisterStates[0].AlphaNeedOverflowControl = false;
787 RegisterStates[0].ColorNeedOverflowControl = false;
788 RegisterStates[0].AuxStored = false;
789 for(int i = 1; i < 4; i++)
790 {
791 RegisterStates[i].AlphaNeedOverflowControl = true;
792 RegisterStates[i].ColorNeedOverflowControl = true;
793 RegisterStates[i].AuxStored = false;
794 }
795
796 for (int i = 0; i < numStages; i++)
797 WriteStage(p, i, ApiType); //build the equation for this stage
798
799 if (numStages)
800 {
801 // The results of the last texenv stage are put onto the screen,
802 // regardless of the used destination register
803 if(bpmem.combiners[numStages - 1].colorC.dest != 0)
804 {
805 bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].AuxStored;
806 WRITEp+=sprintf(p, "\tprev.rgb = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevCOutputTable[bpmem.combiners[numStages - 1].colorC.dest]);
807 RegisterStates[0].ColorNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].colorC.dest].ColorNeedOverflowControl;
808 }
809 if(bpmem.combiners[numStages - 1].alphaC.dest != 0)
810 {
811 bool retrieveFromAuxRegister = !RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl && RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AuxStored;
812 WRITEp+=sprintf(p, "\tprev.a = %s%s;\n", retrieveFromAuxRegister ? "c" : "" , tevAOutputTable[bpmem.combiners[numStages - 1].alphaC.dest]);
813 RegisterStates[0].AlphaNeedOverflowControl = RegisterStates[bpmem.combiners[numStages - 1].alphaC.dest].AlphaNeedOverflowControl;
814 }
815 }
816 // emulation of unsigned 8 overflow when casting if needed
817 if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
818 WRITEp+=sprintf(p, "\tprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
819
820 AlphaTest::TEST_RESULT Pretest = bpmem.alpha_test.TestResult();
821 if (Pretest == AlphaTest::UNDETERMINED)
822 WriteAlphaTest(p, ApiType, dstAlphaMode, per_pixel_depth);
823
824
825 // dx9 doesn't support readback of depth in pixel shader, so we always have to calculate it again
826 // shouldn't be a performance issue as the written depth is usually still from perspective division
827 // but this isn't true for z-textures, so there will be depth issues between enabled and disabled z-textures fragments
828 if((ApiType == API_OPENGL || ApiType == API_D3D11) && g_ActiveConfig.bFastDepthCalc)
829 WRITEp+=sprintf(p, "float zCoord = rawpos.z;\n");
830 else
831 // the screen space depth value = far z + (clip z / clip w) * z range
832 WRITEp+=sprintf(p, "float zCoord = " I_ZBIAS"czbias""[1].x + (clipPos.z / clipPos.w) * " I_ZBIAS"czbias""[1].y;\n");
833
834 // depth texture can safely be ignored if the result won't be written to the depth buffer (early_ztest) and isn't used for fog either
835 bool skip_ztexture = !per_pixel_depth && !bpmem.fog.c_proj_fsel.fsel;
836
837 // Note: z-textures are not written to depth buffer if early depth test is used
838 if (per_pixel_depth && bpmem.zcontrol.early_ztest)
839 WRITEp+=sprintf(p, "depth = zCoord;\n");
840
841 if (bpmem.ztex2.op != ZTEXTURE_DISABLE0 && !skip_ztexture)
842 {
843 // use the texture input of the last texture stage (textemp), hopefully this has been read and is in correct format...
844 WRITEp+=sprintf(p, "zCoord = dot(" I_ZBIAS"czbias""[0].xyzw, textemp.xyzw) + " I_ZBIAS"czbias""[1].w %s;\n",
845 (bpmem.ztex2.op == ZTEXTURE_ADD1) ? "+ zCoord" : "");
846
847 // scale to make result from frac correct
848 WRITEp+=sprintf(p, "zCoord = zCoord * (16777215.0f/16777216.0f);\n");
849 WRITEp+=sprintf(p, "zCoord = %s(zCoord);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
850 WRITEp+=sprintf(p, "zCoord = zCoord * (16777216.0f/16777215.0f);\n");
851 }
852
853 if (per_pixel_depth && !bpmem.zcontrol.early_ztest)
854 WRITEp+=sprintf(p, "depth = zCoord;\n");
855
856 if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
857 {
858 WRITEp+=sprintf(p, "\tocol0 = float4(prev.rgb, " I_ALPHA"alphaRef""[0].a);\n");
859 }
860 else
861 {
862 WriteFog(p, ApiType);
863 WRITEp+=sprintf(p, "\tocol0 = prev;\n");
864 }
865
866 // Use dual-source color blending to perform dst alpha in a
867 // single pass
868 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
869 {
870 if(ApiType & API_D3D9)
871 {
872 // alpha component must be 0 or the shader will not compile (Direct3D 9Ex restriction)
873 // Colors will be blended against the color from ocol1 in D3D 9...
874 WRITEp+=sprintf(p, "\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n");
875 }
876 else
877 {
878 // Colors will be blended against the alpha from ocol1...
879 WRITEp+=sprintf(p, "\tocol1 = prev;\n");
880 }
881 // ...and the alpha from ocol0 will be written to the framebuffer.
882 WRITEp+=sprintf(p, "\tocol0.a = " I_ALPHA"alphaRef""[0].a;\n");
883 }
884
885 WRITEp+=sprintf(p, "}\n");
886 if (text[sizeof(text) - 1] != 0x7C)
887 PanicAlert("PixelShader generator - buffer too small, canary has been eaten!")MsgAlert(false, WARNING, "PixelShader generator - buffer too small, canary has been eaten!"
)
;
888
889 setlocale(LC_NUMERIC1, ""); // restore locale
890 return text;
891}
892
893
894
895//table with the color compare operations
896static const char *TEVCMPColorOPTable[16] =
897{
898 "float3(0.0f, 0.0f, 0.0f)",//0
899 "float3(0.0f, 0.0f, 0.0f)",//1
900 "float3(0.0f, 0.0f, 0.0f)",//2
901 "float3(0.0f, 0.0f, 0.0f)",//3
902 "float3(0.0f, 0.0f, 0.0f)",//4
903 "float3(0.0f, 0.0f, 0.0f)",//5
904 "float3(0.0f, 0.0f, 0.0f)",//6
905 "float3(0.0f, 0.0f, 0.0f)",//7
906 " %s + ((%s.r >= %s.r + (0.25f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_GT 8
907 " %s + ((abs(%s.r - %s.r) < (0.5f/255.0f)) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_R8_EQ 9
908 " %s + (( dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_GT 10
909 " %s + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_GR16_EQ 11
910 " %s + (( dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_GT 12
911 " %s + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s : float3(0.0f, 0.0f, 0.0f))",//#define TEVCMP_BGR24_EQ 13
912 " %s + (max(sign(%s.rgb - %s.rgb - (0.25f/255.0f)), float3(0.0f, 0.0f, 0.0f)) * %s)",//#define TEVCMP_RGB8_GT 14
913 " %s + ((float3(1.0f, 1.0f, 1.0f) - max(sign(abs(%s.rgb - %s.rgb) - (0.5f/255.0f)), float3(0.0f, 0.0f, 0.0f))) * %s)"//#define TEVCMP_RGB8_EQ 15
914};
915
916//table with the alpha compare operations
917static const char *TEVCMPAlphaOPTable[16] =
918{
919 "0.0f",//0
920 "0.0f",//1
921 "0.0f",//2
922 "0.0f",//3
923 "0.0f",//4
924 "0.0f",//5
925 "0.0f",//6
926 "0.0f",//7
927 " %s.a + ((%s.r >= (%s.r + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_R8_GT 8
928 " %s.a + (abs(%s.r - %s.r) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_R8_EQ 9
929 " %s.a + ((dot(%s.rgb, comp16) >= (dot(%s.rgb, comp16) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_GR16_GT 10
930 " %s.a + (abs(dot(%s.rgb, comp16) - dot(%s.rgb, comp16)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_GR16_EQ 11
931 " %s.a + ((dot(%s.rgb, comp24) >= (dot(%s.rgb, comp24) + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_GT 12
932 " %s.a + (abs(dot(%s.rgb, comp24) - dot(%s.rgb, comp24)) < (0.5f/255.0f) ? %s.a : 0.0f)",//#define TEVCMP_BGR24_EQ 13
933 " %s.a + ((%s.a >= (%s.a + (0.25f/255.0f))) ? %s.a : 0.0f)",//#define TEVCMP_A8_GT 14
934 " %s.a + (abs(%s.a - %s.a) < (0.5f/255.0f) ? %s.a : 0.0f)"//#define TEVCMP_A8_EQ 15
935};
936
937static void WriteStage(char *&p, int n, API_TYPE ApiType)
938{
939 int texcoord = bpmem.tevorders[n/2].getTexCoord(n&1);
940 bool bHasTexCoord = (u32)texcoord < bpmem.genMode.numtexgens;
941 bool bHasIndStage = bpmem.tevind[n].IsActive() && bpmem.tevind[n].bt < bpmem.genMode.numindstages;
942 bool bOpenGL = ApiType == API_OPENGL;
943 // HACK to handle cases where the tex gen is not enabled
944 if (!bHasTexCoord)
945 texcoord = 0;
946
947 WRITEp+=sprintf(p, "// TEV stage %d\n", n);
948
949 if (bHasIndStage)
950 {
951 WRITEp+=sprintf(p, "// indirect op\n");
952 // perform the indirect op on the incoming regular coordinates using indtex%d as the offset coords
953 if (bpmem.tevind[n].bs != ITBA_OFF0)
954 {
955 WRITEp+=sprintf(p, "alphabump = indtex%d.%s %s;\n",
956 bpmem.tevind[n].bt,
957 tevIndAlphaSel[bpmem.tevind[n].bs],
958 tevIndAlphaScale[bpmem.tevind[n].fmt]);
959 }
960 // format
961 WRITEp+=sprintf(p, "float3 indtevcrd%d = indtex%d * %s;\n", n, bpmem.tevind[n].bt, tevIndFmtScale[bpmem.tevind[n].fmt]);
962
963 // bias
964 if (bpmem.tevind[n].bias != ITB_NONE0 )
965 WRITEp+=sprintf(p, "indtevcrd%d.%s += %s;\n", n, tevIndBiasField[bpmem.tevind[n].bias], tevIndBiasAdd[bpmem.tevind[n].fmt]);
966
967 // multiply by offset matrix and scale
968 if (bpmem.tevind[n].mid != 0)
969 {
970 if (bpmem.tevind[n].mid <= 3)
971 {
972 int mtxidx = 2*(bpmem.tevind[n].mid-1);
973 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(dot(" I_INDTEXMTX"cindmtx""[%d].xyz, indtevcrd%d), dot(" I_INDTEXMTX"cindmtx""[%d].xyz, indtevcrd%d));\n",
974 n, mtxidx, n, mtxidx+1, n);
975 }
976 else if (bpmem.tevind[n].mid <= 7 && bHasTexCoord)
977 { // s matrix
978 _assert_(bpmem.tevind[n].mid >= 5){};
979 int mtxidx = 2*(bpmem.tevind[n].mid-5);
980 WRITEp+=sprintf(p, "float2 indtevtrans%d = " I_INDTEXMTX"cindmtx""[%d].ww * uv%d.xy * indtevcrd%d.xx;\n", n, mtxidx, texcoord, n);
981 }
982 else if (bpmem.tevind[n].mid <= 11 && bHasTexCoord)
983 { // t matrix
984 _assert_(bpmem.tevind[n].mid >= 9){};
985 int mtxidx = 2*(bpmem.tevind[n].mid-9);
986 WRITEp+=sprintf(p, "float2 indtevtrans%d = " I_INDTEXMTX"cindmtx""[%d].ww * uv%d.xy * indtevcrd%d.yy;\n", n, mtxidx, texcoord, n);
987 }
988 else
989 {
990 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
991 }
992 }
993 else
994 {
995 WRITEp+=sprintf(p, "float2 indtevtrans%d = float2(0.0f, 0.0f);\n", n);
996 }
997
998 // ---------
999 // Wrapping
1000 // ---------
1001
1002 // wrap S
1003 if (bpmem.tevind[n].sw == ITW_OFF0)
1004 WRITEp+=sprintf(p, "wrappedcoord.x = uv%d.x;\n", texcoord);
1005 else if (bpmem.tevind[n].sw == ITW_06)
1006 WRITEp+=sprintf(p, "wrappedcoord.x = 0.0f;\n");
1007 else
1008 WRITEp+=sprintf(p, "wrappedcoord.x = fmod( uv%d.x, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].sw]);
1009
1010 // wrap T
1011 if (bpmem.tevind[n].tw == ITW_OFF0)
1012 WRITEp+=sprintf(p, "wrappedcoord.y = uv%d.y;\n", texcoord);
1013 else if (bpmem.tevind[n].tw == ITW_06)
1014 WRITEp+=sprintf(p, "wrappedcoord.y = 0.0f;\n");
1015 else
1016 WRITEp+=sprintf(p, "wrappedcoord.y = fmod( uv%d.y, %s );\n", texcoord, tevIndWrapStart[bpmem.tevind[n].tw]);
1017
1018 if (bpmem.tevind[n].fb_addprev) // add previous tevcoord
1019 WRITEp+=sprintf(p, "tevcoord.xy += wrappedcoord + indtevtrans%d;\n", n);
1020 else
1021 WRITEp+=sprintf(p, "tevcoord.xy = wrappedcoord + indtevtrans%d;\n", n);
1022 }
1023
1024 TevStageCombiner::ColorCombiner &cc = bpmem.combiners[n].colorC;
1025 TevStageCombiner::AlphaCombiner &ac = bpmem.combiners[n].alphaC;
1026
1027 if(cc.a == TEVCOLORARG_RASA11 || cc.a == TEVCOLORARG_RASC10
1028 || cc.b == TEVCOLORARG_RASA11 || cc.b == TEVCOLORARG_RASC10
1029 || cc.c == TEVCOLORARG_RASA11 || cc.c == TEVCOLORARG_RASC10
1030 || cc.d == TEVCOLORARG_RASA11 || cc.d == TEVCOLORARG_RASC10
1031 || ac.a == TEVALPHAARG_RASA5 || ac.b == TEVALPHAARG_RASA5
1032 || ac.c == TEVALPHAARG_RASA5 || ac.d == TEVALPHAARG_RASA5)
1033 {
1034 char *rasswap = swapModeTable[bpmem.combiners[n].alphaC.rswap];
1035 WRITEp+=sprintf(p, "rastemp = %s.%s;\n", tevRasTable[bpmem.tevorders[n / 2].getColorChan(n & 1)], rasswap);
1036 WRITEp+=sprintf(p, "crastemp = %s(rastemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1037 }
1038
1039
1040 if (bpmem.tevorders[n/2].getEnable(n&1))
1041 {
1042 if (!bHasIndStage)
1043 {
1044 // calc tevcord
1045 if (bHasTexCoord)
1046 WRITEp+=sprintf(p, "tevcoord.xy = uv%d.xy;\n", texcoord);
1047 else
1048 WRITEp+=sprintf(p, "tevcoord.xy = float2(0.0f, 0.0f);\n");
1049 }
1050
1051 char *texswap = swapModeTable[bpmem.combiners[n].alphaC.tswap];
1052 int texmap = bpmem.tevorders[n/2].getTexMap(n&1);
1053 SampleTexture(p, "textemp", "tevcoord", texswap, texmap, ApiType);
1054 }
1055 else
1056 {
1057 WRITEp+=sprintf(p, "textemp = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
1058 }
1059
1060
1061 if (cc.a == TEVCOLORARG_KONST14 || cc.b == TEVCOLORARG_KONST14 || cc.c == TEVCOLORARG_KONST14 || cc.d == TEVCOLORARG_KONST14
1062 || ac.a == TEVALPHAARG_KONST6 || ac.b == TEVALPHAARG_KONST6 || ac.c == TEVALPHAARG_KONST6 || ac.d == TEVALPHAARG_KONST6)
1063 {
1064 int kc = bpmem.tevksel[n / 2].getKC(n & 1);
1065 int ka = bpmem.tevksel[n / 2].getKA(n & 1);
1066 WRITEp+=sprintf(p, "konsttemp = float4(%s, %s);\n", tevKSelTableC[kc], tevKSelTableA[ka]);
1067 if (kc > 7 || ka > 7)
1068 {
1069 WRITEp+=sprintf(p, "ckonsttemp = %s(konsttemp * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1070 }
1071 else
1072 {
1073 WRITEp+=sprintf(p, "ckonsttemp = konsttemp;\n");
1074 }
1075 }
1076
1077 if(cc.a == TEVCOLORARG_CPREV0 || cc.a == TEVCOLORARG_APREV1
1078 || cc.b == TEVCOLORARG_CPREV0 || cc.b == TEVCOLORARG_APREV1
1079 || cc.c == TEVCOLORARG_CPREV0 || cc.c == TEVCOLORARG_APREV1
1080 || ac.a == TEVALPHAARG_APREV0 || ac.b == TEVALPHAARG_APREV0 || ac.c == TEVALPHAARG_APREV0)
1081 {
1082 if(RegisterStates[0].AlphaNeedOverflowControl || RegisterStates[0].ColorNeedOverflowControl)
1083 {
1084 WRITEp+=sprintf(p, "cprev = %s(prev * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1085 RegisterStates[0].AlphaNeedOverflowControl = false;
1086 RegisterStates[0].ColorNeedOverflowControl = false;
1087 }
1088 else
1089 {
1090 WRITEp+=sprintf(p, "cprev = prev;\n");
1091 }
1092 RegisterStates[0].AuxStored = true;
1093 }
1094
1095 if(cc.a == TEVCOLORARG_C02 || cc.a == TEVCOLORARG_A03
1096 || cc.b == TEVCOLORARG_C02 || cc.b == TEVCOLORARG_A03
1097 || cc.c == TEVCOLORARG_C02 || cc.c == TEVCOLORARG_A03
1098 || ac.a == TEVALPHAARG_A01 || ac.b == TEVALPHAARG_A01 || ac.c == TEVALPHAARG_A01)
1099 {
1100 if(RegisterStates[1].AlphaNeedOverflowControl || RegisterStates[1].ColorNeedOverflowControl)
1101 {
1102 WRITEp+=sprintf(p, "cc0 = %s(c0 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1103 RegisterStates[1].AlphaNeedOverflowControl = false;
1104 RegisterStates[1].ColorNeedOverflowControl = false;
1105 }
1106 else
1107 {
1108 WRITEp+=sprintf(p, "cc0 = c0;\n");
1109 }
1110 RegisterStates[1].AuxStored = true;
1111 }
1112
1113 if(cc.a == TEVCOLORARG_C14 || cc.a == TEVCOLORARG_A15
1114 || cc.b == TEVCOLORARG_C14 || cc.b == TEVCOLORARG_A15
1115 || cc.c == TEVCOLORARG_C14 || cc.c == TEVCOLORARG_A15
1116 || ac.a == TEVALPHAARG_A12 || ac.b == TEVALPHAARG_A12 || ac.c == TEVALPHAARG_A12)
1117 {
1118 if(RegisterStates[2].AlphaNeedOverflowControl || RegisterStates[2].ColorNeedOverflowControl)
1119 {
1120 WRITEp+=sprintf(p, "cc1 = %s(c1 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1121 RegisterStates[2].AlphaNeedOverflowControl = false;
1122 RegisterStates[2].ColorNeedOverflowControl = false;
1123 }
1124 else
1125 {
1126 WRITEp+=sprintf(p, "cc1 = c1;\n");
1127 }
1128 RegisterStates[2].AuxStored = true;
1129 }
1130
1131 if(cc.a == TEVCOLORARG_C26 || cc.a == TEVCOLORARG_A27
1132 || cc.b == TEVCOLORARG_C26 || cc.b == TEVCOLORARG_A27
1133 || cc.c == TEVCOLORARG_C26 || cc.c == TEVCOLORARG_A27
1134 || ac.a == TEVALPHAARG_A23 || ac.b == TEVALPHAARG_A23 || ac.c == TEVALPHAARG_A23)
1135 {
1136 if(RegisterStates[3].AlphaNeedOverflowControl || RegisterStates[3].ColorNeedOverflowControl)
1137 {
1138 WRITEp+=sprintf(p, "cc2 = %s(c2 * (255.0f/256.0f)) * (256.0f/255.0f);\n", GLSLConvertFunctions[FUNC_FRAC0 + bOpenGL]);
1139 RegisterStates[3].AlphaNeedOverflowControl = false;
1140 RegisterStates[3].ColorNeedOverflowControl = false;
1141 }
1142 else
1143 {
1144 WRITEp+=sprintf(p, "cc2 = c2;\n");
1145 }
1146 RegisterStates[3].AuxStored = true;
1147 }
1148
1149 RegisterStates[cc.dest].ColorNeedOverflowControl = (cc.clamp == 0);
1150 RegisterStates[cc.dest].AuxStored = false;
1151
1152 // combine the color channel
1153 WRITEp+=sprintf(p, "// color combine\n");
1154 if (cc.clamp)
1155 WRITEp+=sprintf(p, "%s = clamp(", tevCOutputTable[cc.dest]);
1156 else
1157 WRITEp+=sprintf(p, "%s = ", tevCOutputTable[cc.dest]);
1158
1159 // combine the color channel
1160 if (cc.bias != TevBias_COMPARE3) // if not compare
1161 {
1162 //normal color combiner goes here
1163 if (cc.shift > TEVSCALE_10)
1164 WRITEp+=sprintf(p, "%s*(", tevScaleTable[cc.shift]);
1165
1166 if (!(cc.d == TEVCOLORARG_ZERO15 && cc.op == TEVOP_ADD0))
1167 WRITEp+=sprintf(p, "%s%s", tevCInputTable[cc.d], tevOpTable[cc.op]);
1168
1169 if (cc.a == cc.b)
1170 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.a + 16]);
1171 else if (cc.c == TEVCOLORARG_ZERO15)
1172 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.a + 16]);
1173 else if (cc.c == TEVCOLORARG_ONE12)
1174 WRITEp+=sprintf(p, "%s", tevCInputTable[cc.b + 16]);
1175 else if (cc.a == TEVCOLORARG_ZERO15)
1176 WRITEp+=sprintf(p, "%s*%s", tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
1177 else if (cc.b == TEVCOLORARG_ZERO15)
1178 WRITEp+=sprintf(p, "%s*(float3(1.0f, 1.0f, 1.0f)-%s)", tevCInputTable[cc.a + 16], tevCInputTable[cc.c + 16]);
1179 else
1180 WRITEp+=sprintf(p, "%s(%s, %s, %s)", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL], tevCInputTable[cc.a + 16], tevCInputTable[cc.b + 16], tevCInputTable[cc.c + 16]);
1181
1182 WRITEp+=sprintf(p, "%s", tevBiasTable[cc.bias]);
1183
1184 if (cc.shift > TEVSCALE_10)
1185 WRITEp+=sprintf(p, ")");
1186 }
1187 else
1188 {
1189 int cmp = (cc.shift<<1)|cc.op|8; // comparemode stored here
1190 WRITEp+=sprintf(p, TEVCMPColorOPTable[cmp],//lookup the function from the op table
1191 tevCInputTable[cc.d],
1192 tevCInputTable[cc.a + 16],
1193 tevCInputTable[cc.b + 16],
1194 tevCInputTable[cc.c + 16]);
1195 }
1196 if (cc.clamp)
1197 WRITEp+=sprintf(p, ", 0.0, 1.0)");
1198 WRITEp+=sprintf(p,";\n");
1199
1200 RegisterStates[ac.dest].AlphaNeedOverflowControl = (ac.clamp == 0);
1201 RegisterStates[ac.dest].AuxStored = false;
1202
1203 // combine the alpha channel
1204 WRITEp+=sprintf(p, "// alpha combine\n");
1205 if (ac.clamp)
1206 WRITEp+=sprintf(p, "%s = clamp(", tevAOutputTable[ac.dest]);
1207 else
1208 WRITEp+=sprintf(p, "%s = ", tevAOutputTable[ac.dest]);
1209
1210 if (ac.bias != TevBias_COMPARE3) // if not compare
1211 {
1212 //normal alpha combiner goes here
1213 if (ac.shift > TEVSCALE_10)
1214 WRITEp+=sprintf(p, "%s*(", tevScaleTable[ac.shift]);
1215
1216 if (!(ac.d == TEVALPHAARG_ZERO7 && ac.op == TEVOP_ADD0))
1217 WRITEp+=sprintf(p, "%s.a%s", tevAInputTable[ac.d], tevOpTable[ac.op]);
1218
1219 if (ac.a == ac.b)
1220 WRITEp+=sprintf(p, "%s.a", tevAInputTable[ac.a + 8]);
1221 else if (ac.c == TEVALPHAARG_ZERO7)
1222 WRITEp+=sprintf(p, "%s.a", tevAInputTable[ac.a + 8]);
1223 else if (ac.a == TEVALPHAARG_ZERO7)
1224 WRITEp+=sprintf(p, "%s.a*%s.a", tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
1225 else if (ac.b == TEVALPHAARG_ZERO7)
1226 WRITEp+=sprintf(p, "%s.a*(1.0f-%s.a)", tevAInputTable[ac.a + 8], tevAInputTable[ac.c + 8]);
1227 else
1228 WRITEp+=sprintf(p, "%s(%s.a, %s.a, %s.a)", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL], tevAInputTable[ac.a + 8], tevAInputTable[ac.b + 8], tevAInputTable[ac.c + 8]);
1229
1230 WRITEp+=sprintf(p, "%s",tevBiasTable[ac.bias]);
1231
1232 if (ac.shift > 0)
1233 WRITEp+=sprintf(p, ")");
1234
1235 }
1236 else
1237 {
1238 //compare alpha combiner goes here
1239 int cmp = (ac.shift<<1)|ac.op|8; // comparemode stored here
1240 WRITEp+=sprintf(p, TEVCMPAlphaOPTable[cmp],
1241 tevAInputTable[ac.d],
1242 tevAInputTable[ac.a + 8],
1243 tevAInputTable[ac.b + 8],
1244 tevAInputTable[ac.c + 8]);
1245 }
1246 if (ac.clamp)
1247 WRITEp+=sprintf(p, ", 0.0, 1.0)");
1248 WRITEp+=sprintf(p, ";\n\n");
1249 WRITEp+=sprintf(p, "// TEV done\n");
1250}
1251
1252void SampleTexture(char *&p, const char *destination, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType)
1253{
1254 if (ApiType == API_D3D11)
1255 WRITEp+=sprintf(p, "%s=Tex%d.Sample(samp%d,%s.xy * " I_TEXDIMS"texdim""[%d].xy).%s;\n", destination, texmap,texmap, texcoords, texmap, texswap);
1256 else
1257 WRITEp+=sprintf(p, "%s=%s(samp%d,%s.xy * " I_TEXDIMS"texdim""[%d].xy).%s;\n", destination, ApiType == API_OPENGL ? "texture" : "tex2D", texmap, texcoords, texmap, texswap);
1258}
1259
1260static const char *tevAlphaFuncsTable[] =
1261{
1262 "(false)", //ALPHACMP_NEVER 0
1263 "(prev.a <= %s - (0.25f/255.0f))", //ALPHACMP_LESS 1
1264 "(abs( prev.a - %s ) < (0.5f/255.0f))", //ALPHACMP_EQUAL 2
1265 "(prev.a < %s + (0.25f/255.0f))", //ALPHACMP_LEQUAL 3
1266 "(prev.a >= %s + (0.25f/255.0f))", //ALPHACMP_GREATER 4
1267 "(abs( prev.a - %s ) >= (0.5f/255.0f))", //ALPHACMP_NEQUAL 5
1268 "(prev.a > %s - (0.25f/255.0f))", //ALPHACMP_GEQUAL 6
1269 "(true)" //ALPHACMP_ALWAYS 7
1270};
1271
1272static const char *tevAlphaFunclogicTable[] =
1273{
1274 " && ", // and
1275 " || ", // or
1276 " != ", // xor
1277 " == " // xnor
1278};
1279
1280static void WriteAlphaTest(char *&p, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth)
1281{
1282 static const char *alphaRef[2] =
1283 {
1284 I_ALPHA"alphaRef""[0].r",
1285 I_ALPHA"alphaRef""[0].g"
1286 };
1287
1288
1289 // using discard then return works the same in cg and dx9 but not in dx11
1290 WRITEp+=sprintf(p, "\tif(!( ");
1291
1292 int compindex = bpmem.alpha_test.comp0;
1293 WRITEp+=sprintf(p, tevAlphaFuncsTable[compindex],alphaRef[0]);//lookup the first component from the alpha function table
1294
1295 WRITEp+=sprintf(p, "%s", tevAlphaFunclogicTable[bpmem.alpha_test.logic]);//lookup the logic op
1296
1297 compindex = bpmem.alpha_test.comp1;
1298 WRITEp+=sprintf(p, tevAlphaFuncsTable[compindex],alphaRef[1]);//lookup the second component from the alpha function table
1299 WRITEp+=sprintf(p, ")) {\n");
1300
1301 WRITEp+=sprintf(p, "\t\tocol0 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
1302 if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
1303 WRITEp+=sprintf(p, "\t\tocol1 = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
1304 if(per_pixel_depth)
1305 WRITEp+=sprintf(p, "depth = 1.f;\n");
1306
1307 // HAXX: zcomploc (aka early_ztest) is a way to control whether depth test is done before
1308 // or after texturing and alpha test. PC GPUs have no way to support this
1309 // feature properly as of 2012: depth buffer and depth test are not
1310 // programmable and the depth test is always done after texturing.
1311 // Most importantly, PC GPUs do not allow writing to the z-buffer without
1312 // writing a color value (unless color writing is disabled altogether).
1313 // We implement "depth test before texturing" by discarding the fragment
1314 // when the alpha test fail. This is not a correct implementation because
1315 // even if the depth test fails the fragment could be alpha blended, but
1316 // we don't have a choice.
1317 if (!(bpmem.zcontrol.early_ztest && bpmem.zmode.updateenable))
1318 {
1319 WRITEp+=sprintf(p, "\t\tdiscard;\n");
1320 if (ApiType != API_D3D11)
1321 WRITEp+=sprintf(p, "\t\treturn;\n");
1322 }
1323
1324 WRITEp+=sprintf(p, "}\n");
1325}
1326
1327static const char *tevFogFuncsTable[] =
1328{
1329 "", // No Fog
1330 "", // ?
1331 "", // Linear
1332 "", // ?
1333 "\tfog = 1.0f - pow(2.0f, -8.0f * fog);\n", // exp
1334 "\tfog = 1.0f - pow(2.0f, -8.0f * fog * fog);\n", // exp2
1335 "\tfog = pow(2.0f, -8.0f * (1.0f - fog));\n", // backward exp
1336 "\tfog = 1.0f - fog;\n fog = pow(2.0f, -8.0f * fog * fog);\n" // backward exp2
1337};
1338
1339static void WriteFog(char *&p, API_TYPE ApiType)
1340{
1341 bool bOpenGL = ApiType == API_OPENGL;
1342
1343 if (bpmem.fog.c_proj_fsel.fsel == 0)
1344 return; // no Fog
1345
1346 if (bpmem.fog.c_proj_fsel.proj == 0)
1347 {
1348 // perspective
1349 // ze = A/(B - (Zs >> B_SHF)
1350 WRITEp+=sprintf (p, "\tfloat ze = " I_FOG"cfog""[1].x / (" I_FOG"cfog""[1].y - (zCoord / " I_FOG"cfog""[1].w));\n");
1351 }
1352 else
1353 {
1354 // orthographic
1355 // ze = a*Zs (here, no B_SHF)
1356 WRITEp+=sprintf (p, "\tfloat ze = " I_FOG"cfog""[1].x * zCoord;\n");
1357 }
1358
1359 // x_adjust = sqrt((x-center)^2 + k^2)/k
1360 // ze *= x_adjust
1361 //this is completely theoretical as the real hardware seems to use a table instead of calculating the values.
1362 if (bpmem.fogRange.Base.Enabled)
1363 {
1364 WRITEp+=sprintf (p, "\tfloat x_adjust = (2.0f * (clipPos.x / " I_FOG"cfog""[2].y)) - 1.0f - " I_FOG"cfog""[2].x;\n");
1365 WRITEp+=sprintf (p, "\tx_adjust = sqrt(x_adjust * x_adjust + " I_FOG"cfog""[2].z * " I_FOG"cfog""[2].z) / " I_FOG"cfog""[2].z;\n");
1366 WRITEp+=sprintf (p, "\tze *= x_adjust;\n");
1367 }
1368
1369 WRITEp+=sprintf (p, "\tfloat fog = clamp(ze - " I_FOG"cfog""[1].z, 0.0, 1.0);\n");
1370
1371 if (bpmem.fog.c_proj_fsel.fsel > 3)
1372 {
1373 WRITEp+=sprintf(p, "%s", tevFogFuncsTable[bpmem.fog.c_proj_fsel.fsel]);
1374 }
1375 else
1376 {
1377 if (bpmem.fog.c_proj_fsel.fsel != 2)
1378 WARN_LOG(VIDEO, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel)do { { if (LogTypes::LWARNING <= 3) GenericLog(LogTypes::LWARNING
, LogTypes::VIDEO, "/home/anal/dolphin-emu/Source/Core/VideoCommon/Src/PixelShaderGen.cpp"
, 1378, "Unknown Fog Type! %08x", bpmem.fog.c_proj_fsel.fsel)
; } } while (0)
;
1379 }
1380
1381 WRITEp+=sprintf(p, "\tprev.rgb = %s(prev.rgb, " I_FOG"cfog""[0].rgb, fog);\n", GLSLConvertFunctions[FUNC_LERP2 + bOpenGL]);
1382}