From 1457e8659f02b9c402e00c58e35c487b85e0c04b Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 17 Mar 2011 15:23:22 +0000 Subject: [PATCH] drm/i915: Fix tiling corruption from pipelined fencing ... even though it was disabled. A mistake in the handling of fence reuse caused us to skip the vital delay of waiting for the object to be finish rendering before changing the register. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=34584 Cc: Andy Whitcroft Cc: Daniel Vetter Cc: stable@kernel.org # 2.6.38 Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_drv.h | 2 - drivers/gpu/drm/i915/i915_gem.c | 116 ++++++++++------------------ drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 +-- 3 files changed, 44 insertions(+), 86 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 4496505..d5abd2c 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -129,7 +129,6 @@ struct drm_i915_master_private { struct drm_i915_fence_reg { struct list_head lru_list; struct drm_i915_gem_object *obj; - uint32_t setup_seqno; }; struct sdvo_device_mapping { @@ -818,7 +817,6 @@ struct drm_i915_gem_object { /** Breadcrumb of last fenced GPU access to the buffer. */ uint32_t last_fenced_seqno; - struct intel_ring_buffer *last_fenced_ring; /** Current tiling stride for the object, if it's tiled. */ uint32_t stride; diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index d4bf061..6d92b8b8 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -1638,9 +1638,7 @@ i915_gem_object_move_to_active(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *reg; BUG_ON(obj->fence_reg == I915_FENCE_REG_NONE); - obj->last_fenced_seqno = seqno; - obj->last_fenced_ring = ring; reg = &dev_priv->fence_regs[obj->fence_reg]; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); @@ -1682,10 +1680,11 @@ i915_gem_object_move_to_inactive(struct drm_i915_gem_object *obj) obj->ring = NULL; i915_gem_object_move_off_active(obj); - obj->fenced_gpu_access = false; obj->active = 0; obj->pending_gpu_write = false; + obj->fenced_gpu_access = false; + obj->last_fenced_seqno = 0; drm_gem_object_unreference(&obj->base); WARN_ON(i915_verify_lists(dev)); @@ -1836,6 +1835,8 @@ static void i915_gem_reset_fences(struct drm_device *dev) struct drm_i915_private *dev_priv = dev->dev_private; int i; + mb(); + for (i = 0; i < 16; i++) { struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i]; struct drm_i915_gem_object *obj = reg->obj; @@ -1849,9 +1850,10 @@ static void i915_gem_reset_fences(struct drm_device *dev) reg->obj->fence_reg = I915_FENCE_REG_NONE; reg->obj->fenced_gpu_access = false; reg->obj->last_fenced_seqno = 0; - reg->obj->last_fenced_ring = NULL; i915_gem_clear_fence_reg(dev, reg); } + + mb(); } void i915_gem_reset(struct drm_device *dev) @@ -2450,26 +2452,28 @@ i915_gem_object_flush_fence(struct drm_i915_gem_object *obj, if (obj->fenced_gpu_access) { if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->last_fenced_ring, + ret = i915_gem_flush_ring(obj->ring, 0, obj->base.write_domain); if (ret) return ret; } + /* Invalidate the GPU TLBs for any future reads */ + obj->base.read_domains &= ~I915_GEM_GPU_DOMAINS; obj->fenced_gpu_access = false; } - if (obj->last_fenced_seqno && pipelined != obj->last_fenced_ring) { - if (!ring_passed_seqno(obj->last_fenced_ring, - obj->last_fenced_seqno)) { - ret = i915_wait_request(obj->last_fenced_ring, + if (obj->last_fenced_seqno) { + if (ring_passed_seqno(obj->ring, obj->last_fenced_seqno)) { + obj->last_fenced_seqno = 0; + } else if (pipelined != obj->ring) { + ret = i915_wait_request(obj->ring, obj->last_fenced_seqno); if (ret) return ret; - } - obj->last_fenced_seqno = 0; - obj->last_fenced_ring = NULL; + obj->last_fenced_seqno = 0; + } } /* Ensure that all CPU reads are completed before installing a fence @@ -2495,9 +2499,9 @@ i915_gem_object_put_fence(struct drm_i915_gem_object *obj) if (obj->fence_reg != I915_FENCE_REG_NONE) { struct drm_i915_private *dev_priv = obj->base.dev->dev_private; + i915_gem_clear_fence_reg(obj->base.dev, &dev_priv->fence_regs[obj->fence_reg]); - obj->fence_reg = I915_FENCE_REG_NONE; } @@ -2529,15 +2533,15 @@ i915_find_fence_reg(struct drm_device *dev, /* None available, try to steal one or wait for a user to finish */ avail = first = NULL; list_for_each_entry(reg, &dev_priv->mm.fence_list, lru_list) { - if (reg->obj->pin_count) + struct drm_i915_gem_object *obj = reg->obj; + + if (obj->pin_count) continue; if (first == NULL) first = reg; - if (!pipelined || - !reg->obj->last_fenced_ring || - reg->obj->last_fenced_ring == pipelined) { + if (!pipelined || !obj->ring || obj->ring == pipelined) { avail = reg; break; } @@ -2573,58 +2577,16 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, struct drm_i915_fence_reg *reg; int ret; - /* XXX disable pipelining. There are bugs. Shocking. */ - pipelined = NULL; - /* Just update our place in the LRU if our fence is getting reused. */ if (obj->fence_reg != I915_FENCE_REG_NONE) { reg = &dev_priv->fence_regs[obj->fence_reg]; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; - - if (!pipelined) { - if (reg->setup_seqno) { - if (!ring_passed_seqno(obj->last_fenced_ring, - reg->setup_seqno)) { - ret = i915_wait_request(obj->last_fenced_ring, - reg->setup_seqno); - if (ret) - return ret; - } - - reg->setup_seqno = 0; - } - } else if (obj->last_fenced_ring && - obj->last_fenced_ring != pipelined) { + if (obj->tiling_changed) { ret = i915_gem_object_flush_fence(obj, pipelined); if (ret) return ret; - } else if (obj->tiling_changed) { - if (obj->fenced_gpu_access) { - if (obj->base.write_domain & I915_GEM_GPU_DOMAINS) { - ret = i915_gem_flush_ring(obj->ring, - 0, obj->base.write_domain); - if (ret) - return ret; - } - obj->fenced_gpu_access = false; - } - } - - if (!obj->fenced_gpu_access && !obj->last_fenced_seqno) - pipelined = NULL; - BUG_ON(!pipelined && reg->setup_seqno); - - if (obj->tiling_changed) { - if (pipelined) { - reg->setup_seqno = - i915_gem_next_request_seqno(pipelined); - obj->last_fenced_seqno = reg->setup_seqno; - obj->last_fenced_ring = pipelined; - } goto update; } @@ -2647,35 +2609,35 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj, if (old->tiling_mode) i915_gem_release_mmap(old); - ret = i915_gem_object_flush_fence(old, pipelined); + ret = i915_gem_object_flush_fence(old, NULL); if (ret) { drm_gem_object_unreference(&old->base); return ret; } - if (old->last_fenced_seqno == 0 && obj->last_fenced_seqno == 0) - pipelined = NULL; - old->fence_reg = I915_FENCE_REG_NONE; - old->last_fenced_ring = pipelined; - old->last_fenced_seqno = - pipelined ? i915_gem_next_request_seqno(pipelined) : 0; - + if (pipelined && old->last_fenced_seqno) { + old->last_fenced_seqno = + i915_gem_next_request_seqno(pipelined); + i915_gem_object_move_to_active(old, pipelined, + old->last_fenced_seqno); + } + obj->last_fenced_seqno = old->last_fenced_seqno; drm_gem_object_unreference(&old->base); - } else if (obj->last_fenced_seqno == 0) - pipelined = NULL; + } reg->obj = obj; list_move_tail(®->lru_list, &dev_priv->mm.fence_list); obj->fence_reg = reg - dev_priv->fence_regs; - obj->last_fenced_ring = pipelined; - - reg->setup_seqno = - pipelined ? i915_gem_next_request_seqno(pipelined) : 0; - obj->last_fenced_seqno = reg->setup_seqno; update: + if (obj->last_fenced_seqno == 0) + pipelined = NULL; + + obj->last_fenced_seqno = + pipelined ? i915_gem_next_request_seqno(pipelined) : 0; obj->tiling_changed = false; + switch (INTEL_INFO(dev)->gen) { case 6: ret = sandybridge_write_fence_reg(obj, pipelined); @@ -2692,6 +2654,9 @@ update: break; } + if (!pipelined) + mb(); + return ret; } @@ -2730,7 +2695,6 @@ i915_gem_clear_fence_reg(struct drm_device *dev, list_del_init(®->lru_list); reg->obj = NULL; - reg->setup_seqno = 0; } /** diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 7ff7f93..82b24d5 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -172,23 +172,19 @@ i915_gem_object_set_to_gpu_domain(struct drm_i915_gem_object *obj, * write domain */ if (obj->base.write_domain && - (((obj->base.write_domain != obj->base.pending_read_domains || - obj->ring != ring)) || - (obj->fenced_gpu_access && !obj->pending_fenced_gpu_access))) { + (obj->base.write_domain != obj->base.pending_read_domains || + obj->ring != ring)) flush_domains |= obj->base.write_domain; - invalidate_domains |= - obj->base.pending_read_domains & ~obj->base.write_domain; - } /* * Invalidate any read caches which may have * stale data. That is, any new read domains. */ invalidate_domains |= obj->base.pending_read_domains & ~obj->base.read_domains; - if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) + if (flush_domains & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); /* blow away mappings if mapped through GTT */ - if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_GTT) + if (flush_domains & I915_GEM_DOMAIN_GTT) i915_gem_release_mmap(obj); if (obj->base.pending_write_domain) -- 1.7.2.3