so I was looking some ways to improve quality and trying to not decrease performance too much... and I came up with some interesting stuffs. This is my journey to cool shadows:
Mentioned FPS are for that camera view.
Tested with Nvidia 540m, i7, Ubuntu 14.04.
These are PSSM shadows, but it works with other shadows too (haven't tested too much)
No filtering at all:
FPS: 73
=======================
Original pbs template shadows with 2x2 PCF:
FPS: 72
=======================
PCF 5x5:
Code: Select all
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2.5) * invShadowMapSize.x, (y-2.5) * invShadowMapSize.y );
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
FPS: 67
=======================
PCF 5x5 with pseudo-random offset:
Code: Select all
float random(in vec3 seed)
{
return fract(sin(dot(seed, vec3(53.1215, 21.1352, 9.1322))) * 43758.5453);
}
------------------------------
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
offsets = offsets + vec2(random(offsets.xyy)* invShadowMapSize.x,random(offsets.yxy)* invShadowMapSize.y);
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
FPS: 48
=======================
PCF 5x5 with pseudo-random poissonDisk:
Code: Select all
float random(in vec3 seed)
{
return fract(sin(dot(seed, vec3(53.1215, 21.1352, 9.1322))) * 43758.5453);
}
------------------------------
vec2 poissonDisk[4] = vec2[](
vec2( -0.94201624, -0.39906216 ),
vec2( 0.94558609, -0.76890725 ),
vec2( -0.094184101, -0.92938870 ),
vec2( 0.34495938, 0.29387760 )
);
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
float bias = 0.0000000001;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
int index = int( 4*random( gl_FragCoord.xyz / gl_FragCoord.w ) )%4;
offsets = offsets + poissonDisk[index]*0.001;
if ( psPosLN.z+bias <= texture( shadowMap, UVCoords.xy + offsets ).r ){
shadowFactor+=0.04;
}
}
}
return shadowFactor;
FPS: 40
=======================
Then I read that Unity5 uses a 5x5 PCF, and it looks very good/soft, so I guess that what I was lacking is some linear interpolation.
https://unity3d.com/sites/default/files ... hics-8.jpg
I couldn't find how to enable linear filtering on the shadow texture, but here I found a way:
http://codeflow.org/entries/2013/feb/15 ... w-mapping/
this is the result with just the interpolation:
FPS: 72
this is the code from that site
Code: Select all
float texture2DCompare(sampler2D depths, vec2 uv, float compare){
float depth = texture2D(depths, uv).r;
return step(compare, depth);
}
float texture2DShadowLerp(sampler2D depths, vec2 size, vec2 uv, float compare){
vec2 texelSize = vec2(1.0)/size;
vec2 f = fract(uv*size+0.5);
vec2 centroidUV = floor(uv*size+0.5)/size;
float lb = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 0.0), compare);
float lt = texture2DCompare(depths, centroidUV+texelSize*vec2(0.0, 1.0), compare);
float rb = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 0.0), compare);
float rt = texture2DCompare(depths, centroidUV+texelSize*vec2(1.0, 1.0), compare);
float a = mix(lb, lt, f.y);
float b = mix(rb, rt, f.y);
float c = mix(a, b, f.x);
return c;
}
-----------------------------------------
return texture2DShadowLerp(shadowMap, 1/invShadowMapSize, psPosLN.xy / psPosLN.w, psPosLN.z);
=======================
PCF 5x5 + linear interpolation. Now I think we are talking serious!:
Code: Select all
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
for (int y = 0 ; y < 5 ; y++) {
for (int x = 0 ; x < 5 ; x++) {
vec2 offsets = vec2( (x-2) * invShadowMapSize.x, (y-2) * invShadowMapSize.y );
shadowFactor += texture2DShadowLerp(shadowMap, 1/invShadowMapSize, UVCoords.xy + offsets, psPosLN.z);
}
}
return shadowFactor*0.04;
FPS: 36
=======================
So I think maybe with linear interpolation, PCF 3x3 would be enough.
PCF 3x3 + linear interpolation:
Code: Select all
vec3 UVCoords = psPosLN.xyz / psPosLN.w;
float shadowFactor = 0.0;
for (int y = 0 ; y < 3 ; y++) {
for (int x = 0 ; x < 3 ; x++) {
vec2 offsets = vec2( (x-1) * invShadowMapSize.x, (y-1) * invShadowMapSize.y );
shadowFactor += texture2DShadowLerp(shadowMap, 1/invShadowMapSize, UVCoords.xy + offsets, psPosLN.z);
}
}
return shadowFactor*0.11111111;
FPS: 56
=======================
Then I found this post:
http://www.ogre3d.org/forums/viewtopic.php?f=1&t=78834
That uses the same linear interpolation, and then gives a 3x3 version with less lookups.
PCF 3x3 + linear interpolation + optimization:
Code: Select all
vec2 uv = psPosLN.xy / psPosLN.w;
vec2 texelSize = invShadowMapSize;
vec2 size = 1 / invShadowMapSize;
vec2 centroidUV = floor(uv * size + 0.5) / size;
vec2 f = fract(uv * size + 0.5);
int X = 3;
vec2 topLeft = centroidUV - texelSize * 1.5;
// load all pixels needed for the computation
// this way a pixel wont be loaded twice
float kernel[9];
for(int i = 0; i < X; i++){
for(int j = 0; j < X; j++){
kernel[i * X + j] = texture2DCompare(shadowMap, topLeft + vec2(i, j) * texelSize, psPosLN.z);
}
}
float kernel_interpolated[4];
kernel_interpolated[0] = kernel[0] + kernel[1] + kernel[3] + kernel[4];
kernel_interpolated[0] /= 4.0;
kernel_interpolated[1] = kernel[1] + kernel[2] + kernel[4] + kernel[5];
kernel_interpolated[1] /= 4.0;
kernel_interpolated[2] = kernel[3] + kernel[4] + kernel[6] + kernel[7];
kernel_interpolated[2] /= 4.0;
kernel_interpolated[3] = kernel[4] + kernel[5] + kernel[7] + kernel[8];
kernel_interpolated[3] /= 4.0;
float a = mix(kernel_interpolated[0], kernel_interpolated[1], f.y);
float b = mix(kernel_interpolated[2], kernel_interpolated[3], f.y);
float c = mix(a, b, f.x);
return c;
FPS: 69
=======================
Conclusions:
My vote is for something like "PCF 3x3 + linear interpolation + optimization" but I am going to try to make it look like "PCF 3x3 + linear interpolation", and would be almost enough for my needs.
I believe PCF can be enabled by hardware, but its beyond my skills.
I believe that linear interpolation, or bilinear filtering can be enabled for the texture so when you do a simple PCF it would look good instantly, but I don't know how to enable it.
And the only thing that I think its a MUST is STABLE PSSM shadows, like this:
http://www.garagegames.com/community/blogs/view/21284
or this one:
http://www.ogre3d.org/forums/viewtopic.php?f=11&t=71142
but again is beyond my skills.
Hopefully some of this techniques could be on ogre 2.1 by default, maybe be able to activate some filtering like:
Code: Select all
shadow_map 0 2048 2048 PF_FLOAT32_R light 0 split 0 pcf 3 linearFiltering on