Samplin

Overhead Migration

The goal in this post is to migrate a truckload block of code I wrote to handle sampler updating out of zink and into Gallium, thereby creating several days worth of rebase work for myself but also removing a costly codepath from the driver thread.

The first step in getting sampler creation to work right in zink is getting Gallium to create samplers with the correct filters in accordance with Chapter 42 of the Vulkan Spec:

VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT specifies that if VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT is also set, an image view can be used with a sampler that has either of magFilter or minFilter set to VK_FILTER_LINEAR, or mipmapMode set to VK_SAMPLER_MIPMAP_MODE_LINEAR. If VK_FORMAT_FEATURE_BLIT_SRC_BIT is also set, an image can be used as the srcImage to vkCmdBlitImage2KHR and vkCmdBlitImage with a filter of VK_FILTER_LINEAR. This bit must only be exposed for formats that also support the VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT or VK_FORMAT_FEATURE_BLIT_SRC_BIT.

If the format being queried is a depth/stencil format, this bit only specifies that the depth aspect (not the stencil aspect) of an image of this format supports linear filtering, and that linear filtering of the depth aspect is supported whether depth compare is enabled in the sampler or not. If this bit is not present, linear filtering with depth compare disabled is unsupported and linear filtering with depth compare enabled is supported, but may compute the filtered value in an implementation-dependent manner which differs from the normal rules of linear filtering. The resulting value must be in the range [0,1] and should be proportional to, or a weighted average of, the number of comparison passes or failures.

Here’s the (primary) function that I’ll be modifying to get everything working:

void
st_convert_sampler(const struct st_context *st,
                   const struct gl_texture_object *texobj,
                   const struct gl_sampler_object *msamp,
                   float tex_unit_lod_bias,
                   struct pipe_sampler_state *sampler)
{
   memset(sampler, 0, sizeof(*sampler));
   sampler->wrap_s = gl_wrap_xlate(msamp->Attrib.WrapS);
   sampler->wrap_t = gl_wrap_xlate(msamp->Attrib.WrapT);
   sampler->wrap_r = gl_wrap_xlate(msamp->Attrib.WrapR);

   if (texobj->_IsIntegerFormat && st->ctx->Const.ForceIntegerTexNearest) {
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else {
      sampler->min_img_filter = gl_filter_to_img_filter(msamp->Attrib.MinFilter);
      sampler->mag_img_filter = gl_filter_to_img_filter(msamp->Attrib.MagFilter);
   }
   sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->Attrib.MinFilter);

   if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB)
      sampler->normalized_coords = 1;

   sampler->lod_bias = msamp->Attrib.LodBias + tex_unit_lod_bias;
   /* Reduce the number of states by allowing only the values that AMD GCN
    * can represent. Apps use lod_bias for smooth transitions to bigger mipmap
    * levels.
    */
   sampler->lod_bias = CLAMP(sampler->lod_bias, -16, 16);
   sampler->lod_bias = roundf(sampler->lod_bias * 256) / 256;

   sampler->min_lod = MAX2(msamp->Attrib.MinLod, 0.0f);
   sampler->max_lod = msamp->Attrib.MaxLod;
   if (sampler->max_lod < sampler->min_lod) {
      /* The GL spec doesn't seem to specify what to do in this case.
       * Swap the values.
       */
      float tmp = sampler->max_lod;
      sampler->max_lod = sampler->min_lod;
      sampler->min_lod = tmp;
      assert(sampler->min_lod <= sampler->max_lod);
   }

   /* Check that only wrap modes using the border color have the first bit
    * set.
    */
   STATIC_ASSERT(PIPE_TEX_WRAP_CLAMP & 0x1);
   STATIC_ASSERT(PIPE_TEX_WRAP_CLAMP_TO_BORDER & 0x1);
   STATIC_ASSERT(PIPE_TEX_WRAP_MIRROR_CLAMP & 0x1);
   STATIC_ASSERT(PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER & 0x1);
   STATIC_ASSERT(((PIPE_TEX_WRAP_REPEAT |
                   PIPE_TEX_WRAP_CLAMP_TO_EDGE |
                   PIPE_TEX_WRAP_MIRROR_REPEAT |
                   PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE) & 0x1) == 0);

   /* For non-black borders... */
   if (/* This is true if wrap modes are using the border color: */
       (sampler->wrap_s | sampler->wrap_t | sampler->wrap_r) & 0x1 &&
       (msamp->Attrib.BorderColor.ui[0] ||
        msamp->Attrib.BorderColor.ui[1] ||
        msamp->Attrib.BorderColor.ui[2] ||
        msamp->Attrib.BorderColor.ui[3])) {
      const GLboolean is_integer = texobj->_IsIntegerFormat;
      GLenum texBaseFormat = _mesa_base_tex_image(texobj)->_BaseFormat;

      if (texobj->Attrib.StencilSampling)
         texBaseFormat = GL_STENCIL_INDEX;

      if (st->apply_texture_swizzle_to_border_color) {
         const struct st_texture_object *stobj = st_texture_object_const(texobj);
         /* XXX: clean that up to not use the sampler view at all */
         const struct st_sampler_view *sv = st_texture_get_current_sampler_view(st, stobj);

         if (sv) {
            struct pipe_sampler_view *view = sv->view;
            union pipe_color_union tmp;
            const unsigned char swz[4] =
            {
               view->swizzle_r,
               view->swizzle_g,
               view->swizzle_b,
               view->swizzle_a,
            };

            st_translate_color(&msamp->Attrib.BorderColor, &tmp,
                               texBaseFormat, is_integer);

            util_format_apply_color_swizzle(&sampler->border_color,
                                            &tmp, swz, is_integer);
         } else {
            st_translate_color(&msamp->Attrib.BorderColor,
                               &sampler->border_color,
                               texBaseFormat, is_integer);
         }
      } else {
         st_translate_color(&msamp->Attrib.BorderColor,
                            &sampler->border_color,
                            texBaseFormat, is_integer);
      }
   }

   sampler->max_anisotropy = (msamp->Attrib.MaxAnisotropy == 1.0 ?
                              0 : (GLuint) msamp->Attrib.MaxAnisotropy);

   /* If sampling a depth texture and using shadow comparison */
   if (msamp->Attrib.CompareMode == GL_COMPARE_R_TO_TEXTURE) {
      GLenum texBaseFormat = _mesa_base_tex_image(texobj)->_BaseFormat;

      if (texBaseFormat == GL_DEPTH_COMPONENT ||
          (texBaseFormat == GL_DEPTH_STENCIL && !texobj->Attrib.StencilSampling)) {
         sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
         sampler->compare_func = st_compare_func_to_pipe(msamp->Attrib.CompareFunc);
      }
   }

   /* Only set the seamless cube map texture parameter because the per-context
    * enable should be ignored and treated as disabled when using texture
    * handles, as specified by ARB_bindless_texture.
    */
   sampler->seamless_cube_map = msamp->Attrib.CubeMapSeamless;
}

texobj here is the texture being sampled, msamp is the GL sampler object, and sampler is the template for the driver-backed sampler object that will be created with the pipe_context::create_sampler_state hook. The first half of the function deals with setting up filtering and wrap modes. The second half is mostly for border color pre-swizzling (i.e., what the Vulkan spec claims is the way that drivers should be handling border colors).

First: LINEAR Availability

First, if a driver doesn’t provide the format feature for linear filtering, linear filtering can’t be used.

I added a struct pipe_screen hook for this:

/**
 * Check if the given pipe_format and resource is supported for linear filtering
 * as a sampler view.
 * \param format The format to check.
 * \param pres The resource to check.
 */
bool (*is_linear_filtering_supported)( struct pipe_screen *,
                                       enum pipe_format format,
                                       struct pipe_resource *pres );

This gets called in st_convert_sampler(), which is the path that all user-managed samplers go through:

void
st_convert_sampler(const struct st_context *st,
                   const struct gl_texture_object *texobj,
                   const struct gl_sampler_object *msamp,
                   float tex_unit_lod_bias,
                   struct pipe_sampler_state *sampler)
{
   const struct st_texture_object *stobj = NULL;
   const struct st_sampler_view *sv = NULL;

   memset(sampler, 0, sizeof(*sampler));
   sampler->wrap_s = gl_wrap_xlate(msamp->Attrib.WrapS);
   sampler->wrap_t = gl_wrap_xlate(msamp->Attrib.WrapT);
   sampler->wrap_r = gl_wrap_xlate(msamp->Attrib.WrapR);

   bool is_linear_filtering_supported = true;

   if (st->pipe->screen->is_linear_filtering_supported) {
      enum pipe_format fmt = PIPE_FORMAT_NONE;
      stobj = st_texture_object_const(texobj);
      if (stobj->surface_based)
         fmt = stobj->surface_format;
      else {
         sv = st_texture_get_current_sampler_view(st, stobj);
         if (sv)
            fmt = sv->view->format;
         else
            fmt = stobj->pt->format;
      }
      assert(fmt != PIPE_FORMAT_NONE);
      is_linear_filtering_supported =
         st->pipe->screen->is_linear_filtering_supported(st->pipe->screen, fmt, stobj->pt);
   }

   if (!is_linear_filtering_supported ||
       (texobj->_IsIntegerFormat && st->ctx->Const.ForceIntegerTexNearest)) {
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else {
      sampler->min_img_filter = gl_filter_to_img_filter(msamp->Attrib.MinFilter);
      sampler->mag_img_filter = gl_filter_to_img_filter(msamp->Attrib.MagFilter);
   }

   if (is_linear_filtering_supported)
      sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->Attrib.MinFilter);
   else
      sampler->min_mip_filter = gl_filter_to_img_filter(GL_NEAREST);

The code automatically assumes that linear filtering is available for all formats, utilizing the new is_linear_filtering_supported method if it’s available in order to override that value. The filtering modes are then updated based on the (Vulkan) driver’s capabilities.

Easy.

Second: LINEAR Depth Filtering

The spec allows linear filtering unconditionally for formats containing a depth aspect so long as depth compare is enabled:

If this bit is not present, linear filtering with depth compare disabled is unsupported and linear filtering with depth compare enabled is supported

For this, I added PIPE_CAP_LINEAR_DEPTH_FILTERING and some interaction with the pipe_screen::is_linear_filtering_supported hook:

...
   bool is_linear_filtering_supported = true;
   bool has_depth = false;

   if (st->pipe->screen->is_linear_filtering_supported) {
      enum pipe_format fmt = PIPE_FORMAT_NONE;
      stobj = st_texture_object_const(texobj);
      if (stobj->surface_based)
         fmt = stobj->surface_format;
      else {
         sv = st_texture_get_current_sampler_view(st, stobj);
         if (sv)
            fmt = sv->view->format;
         else
            fmt = stobj->pt->format;
      }
      assert(fmt != PIPE_FORMAT_NONE);
      is_linear_filtering_supported =
         st->pipe->screen->is_linear_filtering_supported(st->pipe->screen, fmt, stobj->pt);
      if (st->linear_depth_filtering_semantics)
         has_depth = util_format_has_depth(util_format_description(fmt));
   }

   /* PIPE_CAP_LINEAR_DEPTH_FILTERING */
   if (has_depth &&
       !is_linear_filtering_supported) {
      /* this conditional has the same result as the one after it,
       * but its complexity makes splitting it more readable
       */
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else if ((!is_linear_filtering_supported && !has_depth) ||
       (texobj->_IsIntegerFormat && st->ctx->Const.ForceIntegerTexNearest)) {
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else {
      sampler->min_img_filter = gl_filter_to_img_filter(msamp->Attrib.MinFilter);
      sampler->mag_img_filter = gl_filter_to_img_filter(msamp->Attrib.MagFilter);
   }

   if (is_linear_filtering_supported || has_depth)
      sampler->min_mip_filter = gl_filter_to_mip_filter(msamp->Attrib.MinFilter);
   else
      sampler->min_mip_filter = gl_filter_to_img_filter(GL_NEAREST);
...
   /* PIPE_CAP_LINEAR_DEPTH_FILTERING */
   if (sampler->compare_mode == PIPE_TEX_COMPARE_NONE &&
       has_depth && !is_linear_filtering_supported &&
       (sampler->mag_img_filter == PIPE_TEX_FILTER_LINEAR ||
        sampler->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
        sampler->min_mip_filter == PIPE_TEX_FILTER_LINEAR)) {
      sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE;
      sampler->compare_func = PIPE_FUNC_ALWAYS;
   }

If the format has a depth component, allow linear filtering and, if necessary, enable depth compare.

Nothing too complex here either.

Third: Handle Int-based Border Colors

Now it’s time to start getting grimy. Vulkan’s custom border color extension is, at best, functional. One component of the spec for this is that border colors can be specified as either integer or float values, and this is actually significant, so ideally this should just be passed through using the same value the user specified.

I made PIPE_CAP_NEED_BORDER_COLOR_TYPE for this. When specified, the border_color_is_integer member that I added to struct pipe_sampler_state will be treated as a disambiguating value for sampler states, and thus zink can use it to set the right type of border color values. It affects this glorious micro-optimization in the CSO state cache:

void
cso_single_sampler(struct cso_context *ctx, enum pipe_shader_type shader_stage,
                   unsigned idx, const struct pipe_sampler_state *templ)
{
   if (templ) {
      unsigned key_size = ctx-> needs_sampler_border_color_type ?
                             sizeof(struct pipe_sampler_state) :
                             offsetof(struct pipe_sampler_state, border_color_is_integer);
      unsigned hash_key = cso_construct_key((void*)templ, key_size);

Finally: GL_CLAMP

Get ready to roll around in some mud.

I added PIPE_CAP_EMULATE_GL_CLAMP_PLZ (named by Kayden, not up for discussion) to handle this awfulness. Functionally, it’s comprised of 3 components:

handling in Mesa core to flag shader and sampler state updates any time a sampler sets or unsets GL_CLAMP (or GL_MIRROR_CLAMP)
handling in Gallium to force the sampler to either CLAMP_TO_BORDER or CLAMP_TO_EDGE depending on linear filtering availability for a given resource
shader rewrites in Gallium to run nir_lower_tex with bitfield info set for the GL_CLAMP samplers (henceforth gl_clamplers) that need to be rewritten

Since I’m already going deep into this function, let’s go once more into st_convert_sampler():

...
   /* PIPE_CAP_LINEAR_DEPTH_FILTERING */
   if (has_depth &&
       !is_linear_filtering_supported &&
       (!st->emulate_gl_clamp || (
       sampler->wrap_s != PIPE_TEX_WRAP_CLAMP &&
       sampler->wrap_s != PIPE_TEX_WRAP_MIRROR_CLAMP &&
       sampler->wrap_t != PIPE_TEX_WRAP_CLAMP &&
       sampler->wrap_t != PIPE_TEX_WRAP_MIRROR_CLAMP &&
       sampler->wrap_r != PIPE_TEX_WRAP_CLAMP &&
       sampler->wrap_r != PIPE_TEX_WRAP_MIRROR_CLAMP))) {
      /* this conditional has the same result as the one after it,
       * but its complexity makes splitting it more readable
       */
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else if ((!is_linear_filtering_supported && !has_depth) ||
       (texobj->_IsIntegerFormat && st->ctx->Const.ForceIntegerTexNearest)) {
      sampler->min_img_filter = gl_filter_to_img_filter(GL_NEAREST);
      sampler->mag_img_filter = gl_filter_to_img_filter(GL_NEAREST);
   } else {
      sampler->min_img_filter = min_img_filter;
      sampler->mag_img_filter = gl_filter_to_img_filter(msamp->Attrib.MagFilter);
   }
...
   if (st->emulate_gl_clamp) {
      bool clamp_to_border = (is_linear_filtering_supported || has_depth) &&
                             min_img_filter != PIPE_TEX_FILTER_NEAREST;
      if (sampler->wrap_s == PIPE_TEX_WRAP_CLAMP)
         sampler->wrap_s = clamp_to_border ? PIPE_TEX_WRAP_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_CLAMP_TO_EDGE;
      else if (sampler->wrap_s == PIPE_TEX_WRAP_MIRROR_CLAMP)
         sampler->wrap_s = clamp_to_border ? PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;

      if (sampler->wrap_t == PIPE_TEX_WRAP_CLAMP)
         sampler->wrap_t = clamp_to_border ? PIPE_TEX_WRAP_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_CLAMP_TO_EDGE;
      else if (sampler->wrap_t == PIPE_TEX_WRAP_MIRROR_CLAMP)
         sampler->wrap_t = clamp_to_border ? PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;

      if (sampler->wrap_r == PIPE_TEX_WRAP_CLAMP)
         sampler->wrap_r = clamp_to_border ? PIPE_TEX_WRAP_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_CLAMP_TO_EDGE;
      else if (sampler->wrap_r == PIPE_TEX_WRAP_MIRROR_CLAMP)
         sampler->wrap_r = clamp_to_border ? PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER :
                                             PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE;
   }
...

The depth component codepath is a little trickier, so I’ve split it off for readability even though it’s able to be collapsed into the conditional after. In short, this is only allowing linear filtering for unsupported depth formats when GL_CLAMP is also used.

With the filtering and wrap modes set, the next step here is to adjust the wrap modes based on whether linear filtering is available and the min filter mode. If linear is available and the min filter is linear, GL_CLAMP becomes CLAMP_TO_BORDER, otherwise it’s CLAMP_TO_EDGE. In conjunction with the NIR pass, this ends up replicating the expected behavior.

And to get that info to the NIR pass, more awfulness is required:

static inline GLboolean
is_wrap_gl_clamp(GLint param)
{
   return param == GL_CLAMP || param == GL_MIRROR_CLAMP_EXT;
}

static void
update_gl_clamplers(struct st_context *st, struct gl_program *prog, uint32_t *gl_clamplers)
{
   if (!st->emulate_gl_clamp)
      return;

   gl_clamplers[0] = gl_clamplers[1] = gl_clamplers[2] = 0;
   GLbitfield samplers_used = prog->SamplersUsed;
   unsigned unit;
   /* same as st_atom_sampler.c */
   for (unit = 0; samplers_used; unit++, samplers_used >>= 1) {
      unsigned tex_unit = prog->SamplerUnits[unit];
      if (samplers_used & 1 &&
          (st->ctx->Texture.Unit[tex_unit]._Current->Target != GL_TEXTURE_BUFFER ||
           st->texture_buffer_sampler)) {
         const struct gl_texture_object *texobj;
         struct gl_context *ctx = st->ctx;
         const struct gl_sampler_object *msamp;

         texobj = ctx->Texture.Unit[tex_unit]._Current;
         assert(texobj);

         msamp = _mesa_get_samplerobj(ctx, tex_unit);
         if (is_wrap_gl_clamp(msamp->Attrib.WrapS))
            gl_clamplers[0] |= BITFIELD64_BIT(unit);
         if (is_wrap_gl_clamp(msamp->Attrib.WrapT))
            gl_clamplers[1] |= BITFIELD64_BIT(unit);
         if (is_wrap_gl_clamp(msamp->Attrib.WrapR))
            gl_clamplers[2] |= BITFIELD64_BIT(unit);
      }
   }
}

This function iterates over all the samplers used by a given shader (struct gl_program), checking the wrap modes for GL_CLAMP and then updating the bitfields which correspond to struct nir_lower_tex_options::saturate_{s,t,r} when one is found. Each Gallium shader key is updated to use the values for comparisons, though I’ve helpfully reduced the key size used for comparisons for drivers which don’t set the pipe cap as well as those which do but have yet to see a gl_clampler.

The Result

By setting all these pipe caps and adding a trivial function, zink no longer needs to internally create and track sampler variants based on the above factors in order to support various sampler modes. Additionally, all Gallium-based drivers which emulate GL_CLAMP (there’s several) can switch over to this and delete a bunch of code.

Hooray.

Written on January 26, 2021