From 871671c4ab8ff00e85b434865e8855fc356efa8f Mon Sep 17 00:00:00 2001
From: Cody Northrop <cody@lunarg.com>
Date: Thu, 12 Jun 2014 09:07:18 -0600
Subject: [PATCH] i965/fs: Update discard jump to preserve uniform loads via
 sampler.

The series that implemented this optimization was done before
the changes to use samplers for uniform loads.  Uniform sampler
loads use special execution masks and only populate four
channels, so we can't jump over those or corruption ensues.
Use a more conservative jump mask which only jumps to the end
if all relevant channels are disabled.

No change was observed in GLbenchmark 2.7, so the optimization
is preserved.

Signed-off-by: Cody Northrop <cody@lunarg.com>
Reviewed-by: Mike Stroyan <mike@lunarg.com>
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=79948
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 8858852..fe05715 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1907,7 +1907,15 @@ fs_visitor::visit(ir_discard *ir)
        */
       fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
       discard_jump->flag_subreg = 1;
-      discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
+
+      /* Uniforms are now loaded using samplers with a routine that has
+       * its own execution mask, so we can only jump if all relevant
+       * channels are dead.  This is more conservative than the previous
+       * four channel checking, but still preserves speedups.
+       */
+      discard_jump->predicate = (8 == dispatch_width)
+                                ? BRW_PREDICATE_ALIGN1_ANY8H
+                                : BRW_PREDICATE_ALIGN1_ANY16H;
       discard_jump->predicate_inverse = true;
    }
 }
-- 
1.8.3.2