[tbb-commits] [tor-browser] 209/311: Bug 1758736 - Support RG16 textures in SWGL. r=bradwerth a=dmeehan
gitolite role
git at cupani.torproject.org
Tue Apr 26 15:30:09 UTC 2022
This is an automated email from the git hooks/post-receive script.
pierov pushed a commit to branch geckoview-99.0.1-11.0-1
in repository tor-browser.
commit 8259e9ab200d8ce8cbe154ff3ee2bd6e4f3f3dac
Author: Lee Salzman <lsalzman at mozilla.com>
AuthorDate: Thu Mar 10 16:36:05 2022 +0000
Bug 1758736 - Support RG16 textures in SWGL. r=bradwerth a=dmeehan
Differential Revision: https://phabricator.services.mozilla.com/D140716
---
gfx/webrender_bindings/RenderTextureHostSWGL.cpp | 17 +++-
gfx/wr/swgl/src/gl.cc | 6 ++
gfx/wr/swgl/src/gl_defs.h | 1 +
gfx/wr/swgl/src/glsl.h | 2 +-
gfx/wr/swgl/src/swgl_ext.h | 17 +++-
gfx/wr/swgl/src/texture.h | 121 +++++++++++++++++++++++
gfx/wr/webrender/res/brush_yuv_image.glsl | 4 +-
gfx/wr/webrender/res/composite.glsl | 6 +-
gfx/wr/webrender/res/yuv.glsl | 65 +++++-------
9 files changed, 190 insertions(+), 49 deletions(-)
diff --git a/gfx/webrender_bindings/RenderTextureHostSWGL.cpp b/gfx/webrender_bindings/RenderTextureHostSWGL.cpp
index 17f911fd28740..0e1b869d48f5b 100644
--- a/gfx/webrender_bindings/RenderTextureHostSWGL.cpp
+++ b/gfx/webrender_bindings/RenderTextureHostSWGL.cpp
@@ -55,8 +55,20 @@ bool RenderTextureHostSWGL::UpdatePlanes(RenderCompositor* aCompositor,
}
break;
case gfx::SurfaceFormat::NV12:
- MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_8);
- internalFormat = i > 0 ? LOCAL_GL_RG8 : LOCAL_GL_R8;
+ switch (colorDepth) {
+ case gfx::ColorDepth::COLOR_8:
+ internalFormat = i > 0 ? LOCAL_GL_RG8 : LOCAL_GL_R8;
+ break;
+ case gfx::ColorDepth::COLOR_10:
+ case gfx::ColorDepth::COLOR_12:
+ case gfx::ColorDepth::COLOR_16:
+ internalFormat = i > 0 ? LOCAL_GL_RG16 : LOCAL_GL_R16;
+ break;
+ }
+ break;
+ case gfx::SurfaceFormat::P010:
+ MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_10);
+ internalFormat = i > 0 ? LOCAL_GL_RG16 : LOCAL_GL_R16;
break;
case gfx::SurfaceFormat::YUV422:
MOZ_ASSERT(colorDepth == gfx::ColorDepth::COLOR_8);
@@ -164,6 +176,7 @@ bool RenderTextureHostSWGL::LockSWGLCompositeSurface(
switch (GetFormat()) {
case gfx::SurfaceFormat::YUV:
case gfx::SurfaceFormat::NV12:
+ case gfx::SurfaceFormat::P010:
case gfx::SurfaceFormat::YUV422: {
aInfo->yuv_planes = mPlanes.size();
auto colorSpace = GetYUVColorSpace();
diff --git a/gfx/wr/swgl/src/gl.cc b/gfx/wr/swgl/src/gl.cc
index dcf2df547c9eb..2a06d3af5dcc1 100644
--- a/gfx/wr/swgl/src/gl.cc
+++ b/gfx/wr/swgl/src/gl.cc
@@ -256,6 +256,8 @@ static int bytes_for_internal_format(GLenum internal_format) {
return 2;
case GL_R16:
return 2;
+ case GL_RG16:
+ return 4;
default:
debugf("internal format: %x\n", internal_format);
assert(0);
@@ -279,6 +281,8 @@ static TextureFormat gl_format_to_texture_format(int type) {
return TextureFormat::RG8;
case GL_R16:
return TextureFormat::R16;
+ case GL_RG16:
+ return TextureFormat::RG16;
case GL_RGB_RAW_422_APPLE:
return TextureFormat::YUV422;
default:
@@ -1745,6 +1749,8 @@ GLenum internal_format_for_data(GLenum format, GLenum ty) {
return GL_RGB_RAW_422_APPLE;
} else if (format == GL_RED && ty == GL_UNSIGNED_SHORT) {
return GL_R16;
+ } else if (format == GL_RG && ty == GL_UNSIGNED_SHORT) {
+ return GL_RG16;
} else {
debugf("unknown internal format for format %x, type %x\n", format, ty);
assert(false);
diff --git a/gfx/wr/swgl/src/gl_defs.h b/gfx/wr/swgl/src/gl_defs.h
index 75eb6ca35981e..b60eaad0ece78 100644
--- a/gfx/wr/swgl/src/gl_defs.h
+++ b/gfx/wr/swgl/src/gl_defs.h
@@ -34,6 +34,7 @@ typedef intptr_t GLintptr;
#define GL_RGBA8 0x8058
#define GL_R8 0x8229
#define GL_R16 0x822A
+#define GL_RG16 0x822C
#define GL_RGBA32I 0x8D82
#define GL_BGRA8 0x93A1
#define GL_RG8 0x822B
diff --git a/gfx/wr/swgl/src/glsl.h b/gfx/wr/swgl/src/glsl.h
index 3be1e49d1b2a1..3f6a59a0beacc 100644
--- a/gfx/wr/swgl/src/glsl.h
+++ b/gfx/wr/swgl/src/glsl.h
@@ -8,7 +8,7 @@
namespace glsl {
-enum TextureFormat { RGBA32F, RGBA32I, RGBA8, R8, RG8, R16, YUV422 };
+enum TextureFormat { RGBA32F, RGBA32I, RGBA8, R8, RG8, R16, RG16, YUV422 };
enum TextureFilter { NEAREST, LINEAR };
diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h
index d3dee3cb88bea..3c686dab262a4 100644
--- a/gfx/wr/swgl/src/swgl_ext.h
+++ b/gfx/wr/swgl/src/swgl_ext.h
@@ -1035,7 +1035,7 @@ template <typename S0, typename S1>
static ALWAYS_INLINE PackedRGBA8 sampleYUV(S0 sampler0, ivec2 uv0, S1 sampler1,
ivec2 uv1,
const YUVMatrix& rgb_from_ycbcr,
- UNUSED int rescaleFactor) {
+ int rescaleFactor) {
switch (sampler1->format) {
case TextureFormat::RG8: {
assert(sampler0->format == TextureFormat::R8);
@@ -1051,6 +1051,21 @@ static ALWAYS_INLINE PackedRGBA8 sampleYUV(S0 sampler0, ivec2 uv0, S1 sampler1,
return convertYUV(rgb_from_ycbcr, y, lowHalf(planar.ba),
highHalf(planar.rg));
}
+ case TextureFormat::RG16: {
+ assert(sampler0->format == TextureFormat::R16);
+ // The rescaling factor represents how many bits to add to renormalize the
+ // texture to 16 bits, and so the color depth is actually 16 minus the
+ // rescaling factor.
+ // Need to right shift the sample by the amount of bits over 8 it
+ // occupies. On output from textureLinearUnpackedR16, we have lost 1 bit
+ // of precision at the low end already, hence 1 is subtracted from the
+ // color depth.
+ int colorDepth = 16 - rescaleFactor;
+ int rescaleBits = (colorDepth - 1) - 8;
+ auto y = textureLinearUnpackedR16(sampler0, uv0) >> rescaleBits;
+ auto uv = textureLinearUnpackedRG16(sampler1, uv1) >> rescaleBits;
+ return rgb_from_ycbcr.convert(zip(y, y), uv);
+ }
default:
assert(false);
return PackedRGBA8(0);
diff --git a/gfx/wr/swgl/src/texture.h b/gfx/wr/swgl/src/texture.h
index 8f6988887da48..3f7ed4a518e61 100644
--- a/gfx/wr/swgl/src/texture.h
+++ b/gfx/wr/swgl/src/texture.h
@@ -160,6 +160,21 @@ vec4 texelFetchR16(S sampler, ivec2 P) {
return vec4(fetchOffsetsR16(sampler, offset), 0.0f, 0.0f, 1.0f);
}
+template <typename S>
+SI vec4 fetchOffsetsRG16(S sampler, I32 offset) {
+ U32 pixels = {sampler->buf[offset.x], sampler->buf[offset.y],
+ sampler->buf[offset.z], sampler->buf[offset.w]};
+ Float r = cast(pixels & 0xFFFF) * (1.0f / 65535.0f);
+ Float g = cast(pixels >> 16) * (1.0f / 65535.0f);
+ return vec4(r, g, 0.0f, 1.0f);
+}
+
+template <typename S>
+vec4 texelFetchRG16(S sampler, ivec2 P) {
+ I32 offset = P.x + P.y * sampler->stride;
+ return fetchOffsetsRG16(sampler, offset);
+}
+
SI vec4 fetchOffsetsFloat(const uint32_t* buf, I32 offset) {
return pixel_float_to_vec4(*(Float*)&buf[offset.x], *(Float*)&buf[offset.y],
*(Float*)&buf[offset.z], *(Float*)&buf[offset.w]);
@@ -212,6 +227,8 @@ vec4 texelFetch(sampler2D sampler, ivec2 P, int lod) {
return texelFetchRG8(sampler, P);
case TextureFormat::R16:
return texelFetchR16(sampler, P);
+ case TextureFormat::RG16:
+ return texelFetchRG16(sampler, P);
case TextureFormat::YUV422:
return texelFetchYUV422(sampler, P);
default:
@@ -301,6 +318,8 @@ vec4 texelFetch(sampler2DRect sampler, ivec2 P) {
return texelFetchRG8(sampler, P);
case TextureFormat::R16:
return texelFetchR16(sampler, P);
+ case TextureFormat::RG16:
+ return texelFetchRG16(sampler, P);
case TextureFormat::YUV422:
return texelFetchYUV422(sampler, P);
default:
@@ -710,6 +729,104 @@ vec4 textureLinearR16(S sampler, vec2 P) {
return vec4(r * (1.0f / 32767.0f), 0.0f, 0.0f, 1.0f);
}
+// Samples RG16 texture with linear filtering and returns results packed as
+// signed I16. One bit of precision is shifted away from the bottom end to
+// accommodate the sign bit, so only 15 bits of precision is left.
+template <typename S>
+static inline V8<int16_t> textureLinearUnpackedRG16(S sampler, ivec2 i) {
+ assert(sampler->format == TextureFormat::R16);
+
+ ivec2 frac = i;
+ i >>= 7;
+
+ I32 row0 = computeRow(sampler, i);
+ I32 row1 = row0 + computeNextRowOffset(sampler, i);
+
+ I16 fracx =
+ CONVERT(
+ ((frac.x & (i.x >= 0)) | (i.x > int32_t(sampler->width) - 2)) & 0x7F,
+ I16)
+ << 8;
+ I16 fracy = computeFracY(frac) << 8;
+
+ // Sample the 2x16 bit data for both rows
+ auto a0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.x]);
+ auto b0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.y]);
+ auto ab0 = CONVERT(combine(a0, b0) >> 1, V8<int16_t>);
+ auto c0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.z]);
+ auto d0 = unaligned_load<V4<uint16_t>>(&sampler->buf[row0.w]);
+ auto cd0 = CONVERT(combine(c0, d0) >> 1, V8<int16_t>);
+
+ auto a1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.x]);
+ auto b1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.y]);
+ auto ab1 = CONVERT(combine(a1, b1) >> 1, V8<int16_t>);
+ auto c1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.z]);
+ auto d1 = unaligned_load<V4<uint16_t>>(&sampler->buf[row1.w]);
+ auto cd1 = CONVERT(combine(c1, d1) >> 1, V8<int16_t>);
+
+ // The samples occupy 15 bits and the fraction occupies 15 bits, so that when
+ // they are multiplied together, the new scaled sample will fit in the high
+ // 14 bits of the result. It is left shifted once to make it 15 bits again
+ // for the final multiply.
+#if USE_SSE2
+ ab0 += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(ab1 - ab0, fracy.xxxxyyyy)) << 1;
+ cd0 += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(cd1 - cd0, fracy.zzzzwwww)) << 1;
+#elif USE_NEON
+ // NEON has a convenient instruction that does both the multiply and the
+ // doubling, so doesn't need an extra shift.
+ ab0 += bit_cast<V8<int16_t>>(vqrdmulhq_s16(ab1 - ab0, fracy.xxxxyyyy));
+ cd0 += bit_cast<V8<int16_t>>(vqrdmulhq_s16(cd1 - cd0, fracy.zzzzwwww));
+#else
+ ab0 += CONVERT((CONVERT(ab1 - ab0, V8<int32_t>) *
+ CONVERT(fracy.xxxxyyyy, V8<int32_t>)) >>
+ 16,
+ V8<int16_t>)
+ << 1;
+ cd0 += CONVERT((CONVERT(cd1 - cd0, V8<int32_t>) *
+ CONVERT(fracy.zzzzwwww, V8<int32_t>)) >>
+ 16,
+ V8<int16_t>)
+ << 1;
+#endif
+
+ // ab = a.rgRG,b.rgRG
+ // cd = c.rgRG,d.rgRG
+ // ... ac = a.rg,c.rg,a.RG,c.RG
+ // ... bd = b.rg,d.rg,b.RG,d.RG
+ auto ac = zip2Low(ab0, cd0);
+ auto bd = zip2High(ab0, cd0);
+ // a.rg,b.rg,c.rg,d.rg
+ // a.RG,b.RG,c.RG,d.RG
+ auto abcdl = zip2Low(ac, bd);
+ auto abcdh = zip2High(ac, bd);
+ // Blend columns
+#if USE_SSE2
+ abcdl += bit_cast<V8<int16_t>>(_mm_mulhi_epi16(abcdh - abcdl, fracx.xxyyzzww))
+ << 1;
+#elif USE_NEON
+ abcdl += bit_cast<V8<int16_t>>(vqrdmulhq_s16(abcdh - abcdl, fracx.xxyyzzww));
+#else
+ abcdl += CONVERT((CONVERT(abcdh - abcdl, V8<int32_t>) *
+ CONVERT(fracx.xxyyzzww, V8<int32_t>)) >>
+ 16,
+ V8<int16_t>)
+ << 1;
+#endif
+
+ return abcdl;
+}
+
+template <typename S>
+vec4 textureLinearRG16(S sampler, vec2 P) {
+ assert(sampler->format == TextureFormat::RG16);
+
+ ivec2 i(linearQuantize(P, 128, sampler));
+ auto rg = bit_cast<V4<int32_t>>(textureLinearUnpackedRG16(sampler, i));
+ auto r = cast(rg & 0xFFFF) * (1.0f / 32767.0f);
+ auto g = cast(rg >> 16) * (1.0f / 32767.0f);
+ return vec4(r, g, 0.0f, 1.0f);
+}
+
using PackedRGBA32F = V16<float>;
using WideRGBA32F = V16<float>;
@@ -854,6 +971,8 @@ SI vec4 texture(sampler2D sampler, vec2 P) {
return textureLinearRG8(sampler, P);
case TextureFormat::R16:
return textureLinearR16(sampler, P);
+ case TextureFormat::RG16:
+ return textureLinearRG16(sampler, P);
case TextureFormat::YUV422:
return textureLinearYUV422(sampler, P);
default:
@@ -878,6 +997,8 @@ vec4 texture(sampler2DRect sampler, vec2 P) {
return textureLinearRG8(sampler, P);
case TextureFormat::R16:
return textureLinearR16(sampler, P);
+ case TextureFormat::RG16:
+ return textureLinearRG16(sampler, P);
case TextureFormat::YUV422:
return textureLinearYUV422(sampler, P);
default:
diff --git a/gfx/wr/webrender/res/brush_yuv_image.glsl b/gfx/wr/webrender/res/brush_yuv_image.glsl
index 9d4446320f35a..eb41ecb490fbc 100644
--- a/gfx/wr/webrender/res/brush_yuv_image.glsl
+++ b/gfx/wr/webrender/res/brush_yuv_image.glsl
@@ -55,9 +55,9 @@ void brush_vs(
#ifdef SWGL_DRAW_SPAN
// swgl_commitTextureLinearYUV needs to know the color space specifier and
// also needs to know how many bits of scaling are required to normalize
- // HDR textures.
+ // HDR textures. Note that MSB HDR formats don't need renormalization.
vRescaleFactor = 0;
- if (prim.channel_bit_depth > 8) {
+ if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
vRescaleFactor = 16 - prim.channel_bit_depth;
}
// Since SWGL rescales filtered YUV values to 8bpc before yuv->rgb
diff --git a/gfx/wr/webrender/res/composite.glsl b/gfx/wr/webrender/res/composite.glsl
index 3cfb36de51c83..576199fe5c46e 100644
--- a/gfx/wr/webrender/res/composite.glsl
+++ b/gfx/wr/webrender/res/composite.glsl
@@ -92,9 +92,9 @@ void main(void) {
#ifdef SWGL_DRAW_SPAN
// swgl_commitTextureLinearYUV needs to know the color space specifier and
// also needs to know how many bits of scaling are required to normalize
- // HDR textures.
+ // HDR textures. Note that MSB HDR formats don't need renormalization.
vRescaleFactor = 0;
- if (prim.channel_bit_depth > 8) {
+ if (prim.channel_bit_depth > 8 && prim.yuv_format != YUV_FORMAT_P010) {
vRescaleFactor = 16 - prim.channel_bit_depth;
}
// Since SWGL rescales filtered YUV values to 8bpc before yuv->rgb
@@ -208,7 +208,7 @@ void swgl_drawSpanRGBA8() {
vYcbcrBias,
vRgbFromDebiasedYcbcr,
vRescaleFactor);
- } else if (vYuvFormat.x == YUV_FORMAT_NV12) {
+ } else if (vYuvFormat.x == YUV_FORMAT_NV12 || vYuvFormat.x == YUV_FORMAT_P010) {
swgl_commitTextureLinearYUV(sColor0, vUV_y, vUVBounds_y,
sColor1, vUV_u, vUVBounds_u,
vYcbcrBias,
diff --git a/gfx/wr/webrender/res/yuv.glsl b/gfx/wr/webrender/res/yuv.glsl
index 9fd3af875ef74..064ba3b8afaa4 100644
--- a/gfx/wr/webrender/res/yuv.glsl
+++ b/gfx/wr/webrender/res/yuv.glsl
@@ -77,75 +77,60 @@ struct YuvColorMatrixInfo {
// -
-vec4 yuv_channel_zero_one_identity(int bit_depth, int format) {
- int channel_depth = 8;
- if (bit_depth > 8) {
- if (format == YUV_FORMAT_P010) {
- // This is an msb format.
- channel_depth = min(bit_depth, 16);
- } else {
- // For >8bpc, we get the low bits, not the high bits:
- // 10bpc(1.0): 0b0000_0011_1111_1111
- channel_depth = 16;
- }
- }
-
- float all_ones_normalized = float((1 << bit_depth) - 1) / float((1 << channel_depth) - 1);
+vec4 yuv_channel_zero_one_identity(int bit_depth, float channel_max) {
+ float all_ones_normalized = float((1 << bit_depth) - 1) / channel_max;
return vec4(0.0, 0.0, all_ones_normalized, all_ones_normalized);
}
-vec4 yuv_channel_zero_one_narrow_range(int bit_depth, int format) {
+vec4 yuv_channel_zero_one_narrow_range(int bit_depth, float channel_max) {
// Note: 512/1023 != 128/255
ivec4 zero_one_ints = ivec4(16, 128, 235, 240) << (bit_depth - 8);
-
- int channel_depth = 8;
- if (bit_depth > 8) {
- if (format == YUV_FORMAT_P010) {
- // This is an msb format.
- channel_depth = min(bit_depth, 16);
- } else {
- // For >8bpc, we get the low bits, not the high bits:
- // 10bpc(1.0): 0b0000_0011_1111_1111
- channel_depth = 16;
- }
- }
-
- return vec4(zero_one_ints) / float((1 << channel_depth) - 1);
+ return vec4(zero_one_ints) / channel_max;
}
-vec4 yuv_channel_zero_one_full_range(int bit_depth, int format) {
- vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, format);
- vec4 identity = yuv_channel_zero_one_identity(bit_depth, format);
-
+vec4 yuv_channel_zero_one_full_range(int bit_depth, float channel_max) {
+ vec4 narrow = yuv_channel_zero_one_narrow_range(bit_depth, channel_max);
+ vec4 identity = yuv_channel_zero_one_identity(bit_depth, channel_max);
return vec4(0.0, narrow.y, identity.z, identity.w);
}
YuvColorSamplingInfo get_yuv_color_info(YuvPrimitive prim) {
+ float channel_max = 255.0;
+ if (prim.channel_bit_depth > 8) {
+ if (prim.yuv_format == YUV_FORMAT_P010) {
+ // This is an MSB format.
+ channel_max = float((1 << prim.channel_bit_depth) - 1);
+ } else {
+ // For >8bpc, we get the low bits, not the high bits:
+ // 10bpc(1.0): 0b0000_0011_1111_1111
+ channel_max = 65535.0;
+ }
+ }
if (prim.color_space == YUV_COLOR_SPACE_REC601_NARROW) {
return YuvColorSamplingInfo(RgbFromYuv_Rec601,
- yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC601_FULL) {
return YuvColorSamplingInfo(RgbFromYuv_Rec601,
- yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC709_NARROW) {
return YuvColorSamplingInfo(RgbFromYuv_Rec709,
- yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC709_FULL) {
return YuvColorSamplingInfo(RgbFromYuv_Rec709,
- yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC2020_NARROW) {
return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
- yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_narrow_range(prim.channel_bit_depth, channel_max));
} else if (prim.color_space == YUV_COLOR_SPACE_REC2020_FULL) {
return YuvColorSamplingInfo(RgbFromYuv_Rec2020,
- yuv_channel_zero_one_full_range(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_full_range(prim.channel_bit_depth, channel_max));
} else {
// Identity
return YuvColorSamplingInfo(RgbFromYuv_GbrIdentity,
- yuv_channel_zero_one_identity(prim.channel_bit_depth, prim.yuv_format));
+ yuv_channel_zero_one_identity(prim.channel_bit_depth, channel_max));
}
}
--
To stop receiving notification emails like this one, please contact
the administrator of this repository.
More information about the tbb-commits
mailing list