This question is related to this other one I asked a few days ago. Because I have finally get to the bottom of the issue, I have rather preferred to open a new question with a more detailed information of what is going on.
I basically have a GLSL fragment shader that is giving me unexpected results, depending on whether it is executed using a NVIDIA Quadro, in which works as expected, or a NVIDIA GeForce, in which does something weird. After a detailed evaluation of what is going on, I have realised that the issue is around the discard
operation when using the NVIDIA GeForce 560 Ti.
Can someone explain me why these the two codes behave different in a NVIDIA GeForce 560 Ti, but exactly the sume in a NVIDIA Quadro?
EDIT #1: After some deeper evaluation I can conclude that the discard
statement it is not the problem. The problem occurs when running the sumw if-else
statement at the end of the shader, once colour
has been computed. For whatever reason, if I run the sumw if-else
at the end the shader, the final render is not the expected one. On the other hand, if the sumw if-else
statement is in the middle of the shader, right before running the code that computes the final value of colour
, the shaders works as expected.
VERSION #1 (works wrong in GeForce 560, basically doesn't discard the pixel)
i = 0;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[0],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 1;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[1],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 2;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[2],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 3;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[3],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 4;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[4],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 5;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[5],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 6;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[6],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 7;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[7],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
if (sumw<=0.0) // Evaluating this here makes that the shader behave wrong
{
discard;
}
else
{
gl_FragColor = vec4(colour, 1);
}
VERSION #2 (works the same in both graphics card)
if(sumw<=0.0) // Evaluating this here makes that the shader perform as expected
{
discard;
}
else
{
i = 0;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[0],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 1;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[1],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 2;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[2],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 3;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[3],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 4;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[4],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 5;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[5],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 6;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[6],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
i = 7;
if (i0.0)
{
vec4 texCoord = gl_TextureMatrix[i] * p;
vec4 texColour = texture2DProj(texSampler[7],texCoord);
float w = camw[i] / sumw;
if (w>1.0) w = 1.0;
colour += texColour.rgb*w;
}
}
gl_FragColor = vec4(colour, 1);
If I am not mistaken, the only difference between the two codes is that the first version does not discard the pixel until the rest of operation have been compleated, whereas the second version discards the pixel if necessary, otherwise carries on with the rest of the computation.
However, they should behave the same, because the computation of colour
dos not affect the condition to discard or not, because sumw
doesn't change its value.
Any explanation to this?
Answer
You are inducing undefined behavior.
The behavior of the texture functions that need implicit derivatives is undefined if the texture coordinate provided to it is not within uniform flow (ie: in a conditional branch or uses the results of such a conditional branch).
The texture coordinates you use, not to mention the samplers, are all based on the results of conditional branching logic. Therefore, the results of accessing the texture with them are undefined.
To resolve this, you must either do all of your texture accesses before deciding which results to use, or you must get gradients for all of your texture coordinates and then use the texture "Grad" function(s). Or you can use a texture "Lod" function for a specific Lod of the image (but you won't get mipmaping, for obvious reasons).
No comments:
Post a Comment