From 1995f249db8b0a370604891f6faa37b31ff7c7af Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 1 Oct 2012 09:56:53 +0100 Subject: [PATCH] drm/i915: Avoid forcing relocations through the mappable GTT or CPU If the object lies outside of the mappable GTT aperture, do not force it through the CPU domain for relocations, but simply flush the writes as we perform them and then queue a chipset flush. Signed-off-by: Chris Wilson --- drivers/gpu/drm/i915/i915_gem_execbuffer.c | 87 ++++++++++++++++------------ 1 file changed, 51 insertions(+), 36 deletions(-) diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c index 3eea143..33b015f 100644 --- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c @@ -33,6 +33,9 @@ #include "intel_drv.h" #include +#define __EXEC_OBJECT_HAS_PIN (1<<31) +#define __EXEC_OBJECT_HAS_FENCE (1<<30) + struct eb_objects { int and; struct hlist_head buckets[0]; @@ -95,10 +98,16 @@ eb_destroy(struct eb_objects *eb) static inline int use_cpu_reloc(struct drm_i915_gem_object *obj) { return (obj->base.write_domain == I915_GEM_DOMAIN_CPU || - !obj->map_and_fenceable || obj->cache_level != I915_CACHE_NONE); } +static inline struct page * +gtt_offset_to_page(struct drm_i915_gem_object *obj, u32 offset) +{ + offset -= obj->gtt_space->start; + return i915_gem_object_get_page(obj, offset >> PAGE_SHIFT); +} + static int i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, struct eb_objects *eb, @@ -202,22 +211,20 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return -EFAULT; reloc->delta += target_offset; + reloc->offset += obj->gtt_offset; if (use_cpu_reloc(obj)) { - uint32_t page_offset = reloc->offset & ~PAGE_MASK; char *vaddr; - ret = i915_gem_object_set_to_cpu_domain(obj, 1); + ret = i915_gem_object_set_to_cpu_domain(obj, true); if (ret) return ret; - vaddr = kmap_atomic(i915_gem_object_get_page(obj, - reloc->offset >> PAGE_SHIFT)); - *(uint32_t *)(vaddr + page_offset) = reloc->delta; + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + *(uint32_t *)(vaddr + offset_in_page(reloc->offset)) = reloc->delta; kunmap_atomic(vaddr); } else { struct drm_i915_private *dev_priv = dev->dev_private; - uint32_t __iomem *reloc_entry; - void __iomem *reloc_page; + unsigned page_offset; ret = i915_gem_object_set_to_gtt_domain(obj, true); if (ret) @@ -228,13 +235,28 @@ i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj, return ret; /* Map the page containing the relocation we're going to perform. */ - reloc->offset += obj->gtt_offset; - reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, - reloc->offset & PAGE_MASK); - reloc_entry = (uint32_t __iomem *) - (reloc_page + (reloc->offset & ~PAGE_MASK)); - iowrite32(reloc->delta, reloc_entry); - io_mapping_unmap_atomic(reloc_page); + page_offset = offset_in_page(reloc->offset); + + if (reloc->offset < dev_priv->mm.gtt_mappable_end) { + void __iomem *reloc_page; + + reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping, + reloc->offset & PAGE_MASK); + iowrite32(reloc->delta, reloc_page + page_offset); + io_mapping_unmap_atomic(reloc_page); + } else { + char *vaddr; + + vaddr = kmap_atomic(gtt_offset_to_page(obj, reloc->offset)); + + drm_clflush_virt_range(vaddr + page_offset, 4); + *(uint32_t *)(vaddr + page_offset) = reloc->delta; + drm_clflush_virt_range(vaddr + page_offset, 4); + + kunmap_atomic(vaddr); + + obj->base.pending_write_domain |= I915_GEM_DOMAIN_CPU; + } } /* and update the user's relocation entry */ @@ -332,16 +354,6 @@ i915_gem_execbuffer_relocate(struct drm_device *dev, return ret; } -#define __EXEC_OBJECT_HAS_PIN (1<<31) -#define __EXEC_OBJECT_HAS_FENCE (1<<30) - -static int -need_reloc_mappable(struct drm_i915_gem_object *obj) -{ - struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - return entry->relocation_count && !use_cpu_reloc(obj); -} - static int i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct intel_ring_buffer *ring) @@ -349,16 +361,15 @@ i915_gem_execbuffer_reserve_object(struct drm_i915_gem_object *obj, struct drm_i915_private *dev_priv = obj->base.dev->dev_private; struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4; - bool need_fence, need_mappable; + bool need_fence; int ret; need_fence = has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - ret = i915_gem_object_pin(obj, entry->alignment, need_mappable, false); + ret = i915_gem_object_pin(obj, entry->alignment, need_fence, false); if (ret) return ret; @@ -421,7 +432,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, INIT_LIST_HEAD(&ordered_objects); while (!list_empty(objects)) { struct drm_i915_gem_exec_object2 *entry; - bool need_fence, need_mappable; + bool need_fence; obj = list_first_entry(objects, struct drm_i915_gem_object, @@ -432,9 +443,8 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); - if (need_mappable) + if (need_fence) list_move(&obj->exec_list, &ordered_objects); else list_move_tail(&obj->exec_list, &ordered_objects); @@ -464,7 +474,7 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, /* Unbind any ill-fitting objects or pin. */ list_for_each_entry(obj, objects, exec_list) { struct drm_i915_gem_exec_object2 *entry = obj->exec_entry; - bool need_fence, need_mappable; + bool need_fence; if (!obj->gtt_space) continue; @@ -473,10 +483,9 @@ i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring, has_fenced_gpu_access && entry->flags & EXEC_OBJECT_NEEDS_FENCE && obj->tiling_mode != I915_TILING_NONE; - need_mappable = need_fence || need_reloc_mappable(obj); if ((entry->alignment && obj->gtt_offset & (entry->alignment - 1)) || - (need_mappable && !obj->map_and_fenceable)) + (need_fence && !obj->map_and_fenceable)) ret = i915_gem_object_unbind(obj); else ret = i915_gem_execbuffer_reserve_object(obj, ring); @@ -656,13 +665,19 @@ i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring, if (ret) return ret; + flush_domains |= obj->base.write_domain; + if (obj->base.write_domain & I915_GEM_DOMAIN_CPU) i915_gem_clflush_object(obj); + /* Used as an internal marker during relocation processing */ + if (obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS) { + flush_domains |= obj->base.pending_write_domain & ~I915_GEM_GPU_DOMAINS; + obj->base.pending_write_domain &= I915_GEM_GPU_DOMAINS; + } + if (obj->base.pending_write_domain) flips |= atomic_read(&obj->pending_flip); - - flush_domains |= obj->base.write_domain; } if (flips) { -- 1.7.10.4