2 * Copyright © 2008 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Eric Anholt <eric@anholt.net>
32 #include "i915_trace.h"
33 #include "intel_drv.h"
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <linux/pci.h>
38 static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
39 static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
40 static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
41 static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
42 static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
44 static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
47 static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
48 static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
49 static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
51 static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
52 static int i915_gem_evict_something(struct drm_device *dev, int min_size,
54 static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
55 static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
56 struct drm_i915_gem_pwrite *args,
57 struct drm_file *file_priv);
58 static void i915_gem_free_object_tail(struct drm_gem_object *obj);
60 static LIST_HEAD(shrink_list);
61 static DEFINE_SPINLOCK(shrink_list_lock);
63 int i915_gem_do_init(struct drm_device *dev, unsigned long start,
66 drm_i915_private_t *dev_priv = dev->dev_private;
69 (start & (PAGE_SIZE - 1)) != 0 ||
70 (end & (PAGE_SIZE - 1)) != 0) {
74 drm_mm_init(&dev_priv->mm.gtt_space, start,
77 dev->gtt_total = (uint32_t) (end - start);
83 i915_gem_init_ioctl(struct drm_device *dev, void *data,
84 struct drm_file *file_priv)
86 struct drm_i915_gem_init *args = data;
89 mutex_lock(&dev->struct_mutex);
90 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
91 mutex_unlock(&dev->struct_mutex);
97 i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
98 struct drm_file *file_priv)
100 struct drm_i915_gem_get_aperture *args = data;
102 if (!(dev->driver->driver_features & DRIVER_GEM))
105 args->aper_size = dev->gtt_total;
106 args->aper_available_size = (args->aper_size -
107 atomic_read(&dev->pin_memory));
114 * Creates a new mm object and returns a handle to it.
117 i915_gem_create_ioctl(struct drm_device *dev, void *data,
118 struct drm_file *file_priv)
120 struct drm_i915_gem_create *args = data;
121 struct drm_gem_object *obj;
125 args->size = roundup(args->size, PAGE_SIZE);
127 /* Allocate the new object */
128 obj = i915_gem_alloc_object(dev, args->size);
132 ret = drm_gem_handle_create(file_priv, obj, &handle);
133 drm_gem_object_unreference_unlocked(obj);
137 args->handle = handle;
143 fast_shmem_read(struct page **pages,
144 loff_t page_base, int page_offset,
151 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
154 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
155 kunmap_atomic(vaddr, KM_USER0);
163 static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
165 drm_i915_private_t *dev_priv = obj->dev->dev_private;
166 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
168 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
169 obj_priv->tiling_mode != I915_TILING_NONE;
173 slow_shmem_copy(struct page *dst_page,
175 struct page *src_page,
179 char *dst_vaddr, *src_vaddr;
181 dst_vaddr = kmap(dst_page);
182 src_vaddr = kmap(src_page);
184 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
191 slow_shmem_bit17_copy(struct page *gpu_page,
193 struct page *cpu_page,
198 char *gpu_vaddr, *cpu_vaddr;
200 /* Use the unswizzled path if this page isn't affected. */
201 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
203 return slow_shmem_copy(cpu_page, cpu_offset,
204 gpu_page, gpu_offset, length);
206 return slow_shmem_copy(gpu_page, gpu_offset,
207 cpu_page, cpu_offset, length);
210 gpu_vaddr = kmap(gpu_page);
211 cpu_vaddr = kmap(cpu_page);
213 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
214 * XORing with the other bits (A9 for Y, A9 and A10 for X)
217 int cacheline_end = ALIGN(gpu_offset + 1, 64);
218 int this_length = min(cacheline_end - gpu_offset, length);
219 int swizzled_gpu_offset = gpu_offset ^ 64;
222 memcpy(cpu_vaddr + cpu_offset,
223 gpu_vaddr + swizzled_gpu_offset,
226 memcpy(gpu_vaddr + swizzled_gpu_offset,
227 cpu_vaddr + cpu_offset,
230 cpu_offset += this_length;
231 gpu_offset += this_length;
232 length -= this_length;
240 * This is the fast shmem pread path, which attempts to copy_from_user directly
241 * from the backing pages of the object to the user's address space. On a
242 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
245 i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
246 struct drm_i915_gem_pread *args,
247 struct drm_file *file_priv)
249 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
251 loff_t offset, page_base;
252 char __user *user_data;
253 int page_offset, page_length;
256 user_data = (char __user *) (uintptr_t) args->data_ptr;
259 mutex_lock(&dev->struct_mutex);
261 ret = i915_gem_object_get_pages(obj, 0);
265 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
270 obj_priv = to_intel_bo(obj);
271 offset = args->offset;
274 /* Operation in this page
276 * page_base = page offset within aperture
277 * page_offset = offset within page
278 * page_length = bytes to copy for this page
280 page_base = (offset & ~(PAGE_SIZE-1));
281 page_offset = offset & (PAGE_SIZE-1);
282 page_length = remain;
283 if ((page_offset + remain) > PAGE_SIZE)
284 page_length = PAGE_SIZE - page_offset;
286 ret = fast_shmem_read(obj_priv->pages,
287 page_base, page_offset,
288 user_data, page_length);
292 remain -= page_length;
293 user_data += page_length;
294 offset += page_length;
298 i915_gem_object_put_pages(obj);
300 mutex_unlock(&dev->struct_mutex);
306 i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
310 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
312 /* If we've insufficient memory to map in the pages, attempt
313 * to make some space by throwing out some old buffers.
315 if (ret == -ENOMEM) {
316 struct drm_device *dev = obj->dev;
318 ret = i915_gem_evict_something(dev, obj->size,
319 i915_gem_get_gtt_alignment(obj));
323 ret = i915_gem_object_get_pages(obj, 0);
330 * This is the fallback shmem pread path, which allocates temporary storage
331 * in kernel space to copy_to_user into outside of the struct_mutex, so we
332 * can copy out of the object's backing pages while holding the struct mutex
333 * and not take page faults.
336 i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
337 struct drm_i915_gem_pread *args,
338 struct drm_file *file_priv)
340 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
341 struct mm_struct *mm = current->mm;
342 struct page **user_pages;
344 loff_t offset, pinned_pages, i;
345 loff_t first_data_page, last_data_page, num_pages;
346 int shmem_page_index, shmem_page_offset;
347 int data_page_index, data_page_offset;
350 uint64_t data_ptr = args->data_ptr;
351 int do_bit17_swizzling;
355 /* Pin the user pages containing the data. We can't fault while
356 * holding the struct mutex, yet we want to hold it while
357 * dereferencing the user data.
359 first_data_page = data_ptr / PAGE_SIZE;
360 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
361 num_pages = last_data_page - first_data_page + 1;
363 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
364 if (user_pages == NULL)
367 down_read(&mm->mmap_sem);
368 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
369 num_pages, 1, 0, user_pages, NULL);
370 up_read(&mm->mmap_sem);
371 if (pinned_pages < num_pages) {
373 goto fail_put_user_pages;
376 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
378 mutex_lock(&dev->struct_mutex);
380 ret = i915_gem_object_get_pages_or_evict(obj);
384 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
389 obj_priv = to_intel_bo(obj);
390 offset = args->offset;
393 /* Operation in this page
395 * shmem_page_index = page number within shmem file
396 * shmem_page_offset = offset within page in shmem file
397 * data_page_index = page number in get_user_pages return
398 * data_page_offset = offset with data_page_index page.
399 * page_length = bytes to copy for this page
401 shmem_page_index = offset / PAGE_SIZE;
402 shmem_page_offset = offset & ~PAGE_MASK;
403 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
404 data_page_offset = data_ptr & ~PAGE_MASK;
406 page_length = remain;
407 if ((shmem_page_offset + page_length) > PAGE_SIZE)
408 page_length = PAGE_SIZE - shmem_page_offset;
409 if ((data_page_offset + page_length) > PAGE_SIZE)
410 page_length = PAGE_SIZE - data_page_offset;
412 if (do_bit17_swizzling) {
413 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
415 user_pages[data_page_index],
420 slow_shmem_copy(user_pages[data_page_index],
422 obj_priv->pages[shmem_page_index],
427 remain -= page_length;
428 data_ptr += page_length;
429 offset += page_length;
433 i915_gem_object_put_pages(obj);
435 mutex_unlock(&dev->struct_mutex);
437 for (i = 0; i < pinned_pages; i++) {
438 SetPageDirty(user_pages[i]);
439 page_cache_release(user_pages[i]);
441 drm_free_large(user_pages);
447 * Reads data from the object referenced by handle.
449 * On error, the contents of *data are undefined.
452 i915_gem_pread_ioctl(struct drm_device *dev, void *data,
453 struct drm_file *file_priv)
455 struct drm_i915_gem_pread *args = data;
456 struct drm_gem_object *obj;
457 struct drm_i915_gem_object *obj_priv;
460 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
463 obj_priv = to_intel_bo(obj);
465 /* Bounds check source.
467 * XXX: This could use review for overflow issues...
469 if (args->offset > obj->size || args->size > obj->size ||
470 args->offset + args->size > obj->size) {
471 drm_gem_object_unreference_unlocked(obj);
475 if (i915_gem_object_needs_bit17_swizzle(obj)) {
476 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
478 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
480 ret = i915_gem_shmem_pread_slow(dev, obj, args,
484 drm_gem_object_unreference_unlocked(obj);
489 /* This is the fast write path which cannot handle
490 * page faults in the source data
494 fast_user_write(struct io_mapping *mapping,
495 loff_t page_base, int page_offset,
496 char __user *user_data,
500 unsigned long unwritten;
502 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
503 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
505 io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
511 /* Here's the write path which can sleep for
516 slow_kernel_write(struct io_mapping *mapping,
517 loff_t gtt_base, int gtt_offset,
518 struct page *user_page, int user_offset,
521 char __iomem *dst_vaddr;
524 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
525 src_vaddr = kmap(user_page);
527 memcpy_toio(dst_vaddr + gtt_offset,
528 src_vaddr + user_offset,
532 io_mapping_unmap(dst_vaddr);
536 fast_shmem_write(struct page **pages,
537 loff_t page_base, int page_offset,
542 unsigned long unwritten;
544 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
547 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
548 kunmap_atomic(vaddr, KM_USER0);
556 * This is the fast pwrite path, where we copy the data directly from the
557 * user into the GTT, uncached.
560 i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
561 struct drm_i915_gem_pwrite *args,
562 struct drm_file *file_priv)
564 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
565 drm_i915_private_t *dev_priv = dev->dev_private;
567 loff_t offset, page_base;
568 char __user *user_data;
569 int page_offset, page_length;
572 user_data = (char __user *) (uintptr_t) args->data_ptr;
574 if (!access_ok(VERIFY_READ, user_data, remain))
578 mutex_lock(&dev->struct_mutex);
579 ret = i915_gem_object_pin(obj, 0);
581 mutex_unlock(&dev->struct_mutex);
584 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
588 obj_priv = to_intel_bo(obj);
589 offset = obj_priv->gtt_offset + args->offset;
592 /* Operation in this page
594 * page_base = page offset within aperture
595 * page_offset = offset within page
596 * page_length = bytes to copy for this page
598 page_base = (offset & ~(PAGE_SIZE-1));
599 page_offset = offset & (PAGE_SIZE-1);
600 page_length = remain;
601 if ((page_offset + remain) > PAGE_SIZE)
602 page_length = PAGE_SIZE - page_offset;
604 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
605 page_offset, user_data, page_length);
607 /* If we get a fault while copying data, then (presumably) our
608 * source page isn't available. Return the error and we'll
609 * retry in the slow path.
614 remain -= page_length;
615 user_data += page_length;
616 offset += page_length;
620 i915_gem_object_unpin(obj);
621 mutex_unlock(&dev->struct_mutex);
627 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
628 * the memory and maps it using kmap_atomic for copying.
630 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
631 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
634 i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
635 struct drm_i915_gem_pwrite *args,
636 struct drm_file *file_priv)
638 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
639 drm_i915_private_t *dev_priv = dev->dev_private;
641 loff_t gtt_page_base, offset;
642 loff_t first_data_page, last_data_page, num_pages;
643 loff_t pinned_pages, i;
644 struct page **user_pages;
645 struct mm_struct *mm = current->mm;
646 int gtt_page_offset, data_page_offset, data_page_index, page_length;
648 uint64_t data_ptr = args->data_ptr;
652 /* Pin the user pages containing the data. We can't fault while
653 * holding the struct mutex, and all of the pwrite implementations
654 * want to hold it while dereferencing the user data.
656 first_data_page = data_ptr / PAGE_SIZE;
657 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
658 num_pages = last_data_page - first_data_page + 1;
660 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
661 if (user_pages == NULL)
664 down_read(&mm->mmap_sem);
665 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
666 num_pages, 0, 0, user_pages, NULL);
667 up_read(&mm->mmap_sem);
668 if (pinned_pages < num_pages) {
670 goto out_unpin_pages;
673 mutex_lock(&dev->struct_mutex);
674 ret = i915_gem_object_pin(obj, 0);
678 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
680 goto out_unpin_object;
682 obj_priv = to_intel_bo(obj);
683 offset = obj_priv->gtt_offset + args->offset;
686 /* Operation in this page
688 * gtt_page_base = page offset within aperture
689 * gtt_page_offset = offset within page in aperture
690 * data_page_index = page number in get_user_pages return
691 * data_page_offset = offset with data_page_index page.
692 * page_length = bytes to copy for this page
694 gtt_page_base = offset & PAGE_MASK;
695 gtt_page_offset = offset & ~PAGE_MASK;
696 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
697 data_page_offset = data_ptr & ~PAGE_MASK;
699 page_length = remain;
700 if ((gtt_page_offset + page_length) > PAGE_SIZE)
701 page_length = PAGE_SIZE - gtt_page_offset;
702 if ((data_page_offset + page_length) > PAGE_SIZE)
703 page_length = PAGE_SIZE - data_page_offset;
705 slow_kernel_write(dev_priv->mm.gtt_mapping,
706 gtt_page_base, gtt_page_offset,
707 user_pages[data_page_index],
711 remain -= page_length;
712 offset += page_length;
713 data_ptr += page_length;
717 i915_gem_object_unpin(obj);
719 mutex_unlock(&dev->struct_mutex);
721 for (i = 0; i < pinned_pages; i++)
722 page_cache_release(user_pages[i]);
723 drm_free_large(user_pages);
729 * This is the fast shmem pwrite path, which attempts to directly
730 * copy_from_user into the kmapped pages backing the object.
733 i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
734 struct drm_i915_gem_pwrite *args,
735 struct drm_file *file_priv)
737 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
739 loff_t offset, page_base;
740 char __user *user_data;
741 int page_offset, page_length;
744 user_data = (char __user *) (uintptr_t) args->data_ptr;
747 mutex_lock(&dev->struct_mutex);
749 ret = i915_gem_object_get_pages(obj, 0);
753 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
757 obj_priv = to_intel_bo(obj);
758 offset = args->offset;
762 /* Operation in this page
764 * page_base = page offset within aperture
765 * page_offset = offset within page
766 * page_length = bytes to copy for this page
768 page_base = (offset & ~(PAGE_SIZE-1));
769 page_offset = offset & (PAGE_SIZE-1);
770 page_length = remain;
771 if ((page_offset + remain) > PAGE_SIZE)
772 page_length = PAGE_SIZE - page_offset;
774 ret = fast_shmem_write(obj_priv->pages,
775 page_base, page_offset,
776 user_data, page_length);
780 remain -= page_length;
781 user_data += page_length;
782 offset += page_length;
786 i915_gem_object_put_pages(obj);
788 mutex_unlock(&dev->struct_mutex);
794 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
795 * the memory and maps it using kmap_atomic for copying.
797 * This avoids taking mmap_sem for faulting on the user's address while the
798 * struct_mutex is held.
801 i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
802 struct drm_i915_gem_pwrite *args,
803 struct drm_file *file_priv)
805 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
806 struct mm_struct *mm = current->mm;
807 struct page **user_pages;
809 loff_t offset, pinned_pages, i;
810 loff_t first_data_page, last_data_page, num_pages;
811 int shmem_page_index, shmem_page_offset;
812 int data_page_index, data_page_offset;
815 uint64_t data_ptr = args->data_ptr;
816 int do_bit17_swizzling;
820 /* Pin the user pages containing the data. We can't fault while
821 * holding the struct mutex, and all of the pwrite implementations
822 * want to hold it while dereferencing the user data.
824 first_data_page = data_ptr / PAGE_SIZE;
825 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
826 num_pages = last_data_page - first_data_page + 1;
828 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
829 if (user_pages == NULL)
832 down_read(&mm->mmap_sem);
833 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
834 num_pages, 0, 0, user_pages, NULL);
835 up_read(&mm->mmap_sem);
836 if (pinned_pages < num_pages) {
838 goto fail_put_user_pages;
841 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
843 mutex_lock(&dev->struct_mutex);
845 ret = i915_gem_object_get_pages_or_evict(obj);
849 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
853 obj_priv = to_intel_bo(obj);
854 offset = args->offset;
858 /* Operation in this page
860 * shmem_page_index = page number within shmem file
861 * shmem_page_offset = offset within page in shmem file
862 * data_page_index = page number in get_user_pages return
863 * data_page_offset = offset with data_page_index page.
864 * page_length = bytes to copy for this page
866 shmem_page_index = offset / PAGE_SIZE;
867 shmem_page_offset = offset & ~PAGE_MASK;
868 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
869 data_page_offset = data_ptr & ~PAGE_MASK;
871 page_length = remain;
872 if ((shmem_page_offset + page_length) > PAGE_SIZE)
873 page_length = PAGE_SIZE - shmem_page_offset;
874 if ((data_page_offset + page_length) > PAGE_SIZE)
875 page_length = PAGE_SIZE - data_page_offset;
877 if (do_bit17_swizzling) {
878 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
880 user_pages[data_page_index],
885 slow_shmem_copy(obj_priv->pages[shmem_page_index],
887 user_pages[data_page_index],
892 remain -= page_length;
893 data_ptr += page_length;
894 offset += page_length;
898 i915_gem_object_put_pages(obj);
900 mutex_unlock(&dev->struct_mutex);
902 for (i = 0; i < pinned_pages; i++)
903 page_cache_release(user_pages[i]);
904 drm_free_large(user_pages);
910 * Writes data to the object referenced by handle.
912 * On error, the contents of the buffer that were to be modified are undefined.
915 i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
916 struct drm_file *file_priv)
918 struct drm_i915_gem_pwrite *args = data;
919 struct drm_gem_object *obj;
920 struct drm_i915_gem_object *obj_priv;
923 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
926 obj_priv = to_intel_bo(obj);
928 /* Bounds check destination.
930 * XXX: This could use review for overflow issues...
932 if (args->offset > obj->size || args->size > obj->size ||
933 args->offset + args->size > obj->size) {
934 drm_gem_object_unreference_unlocked(obj);
938 /* We can only do the GTT pwrite on untiled buffers, as otherwise
939 * it would end up going through the fenced access, and we'll get
940 * different detiling behavior between reading and writing.
941 * pread/pwrite currently are reading and writing from the CPU
942 * perspective, requiring manual detiling by the client.
944 if (obj_priv->phys_obj)
945 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
946 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
947 dev->gtt_total != 0 &&
948 obj->write_domain != I915_GEM_DOMAIN_CPU) {
949 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
950 if (ret == -EFAULT) {
951 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
954 } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
955 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
957 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
958 if (ret == -EFAULT) {
959 ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
966 DRM_INFO("pwrite failed %d\n", ret);
969 drm_gem_object_unreference_unlocked(obj);
975 * Called when user space prepares to use an object with the CPU, either
976 * through the mmap ioctl's mapping or a GTT mapping.
979 i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
980 struct drm_file *file_priv)
982 struct drm_i915_private *dev_priv = dev->dev_private;
983 struct drm_i915_gem_set_domain *args = data;
984 struct drm_gem_object *obj;
985 struct drm_i915_gem_object *obj_priv;
986 uint32_t read_domains = args->read_domains;
987 uint32_t write_domain = args->write_domain;
990 if (!(dev->driver->driver_features & DRIVER_GEM))
993 /* Only handle setting domains to types used by the CPU. */
994 if (write_domain & I915_GEM_GPU_DOMAINS)
997 if (read_domains & I915_GEM_GPU_DOMAINS)
1000 /* Having something in the write domain implies it's in the read
1001 * domain, and only that read domain. Enforce that in the request.
1003 if (write_domain != 0 && read_domains != write_domain)
1006 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1009 obj_priv = to_intel_bo(obj);
1011 mutex_lock(&dev->struct_mutex);
1013 intel_mark_busy(dev, obj);
1016 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
1017 obj, obj->size, read_domains, write_domain);
1019 if (read_domains & I915_GEM_DOMAIN_GTT) {
1020 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
1022 /* Update the LRU on the fence for the CPU access that's
1025 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1026 struct drm_i915_fence_reg *reg =
1027 &dev_priv->fence_regs[obj_priv->fence_reg];
1028 list_move_tail(®->lru_list,
1029 &dev_priv->mm.fence_list);
1032 /* Silently promote "you're not bound, there was nothing to do"
1033 * to success, since the client was just asking us to
1034 * make sure everything was done.
1039 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
1042 drm_gem_object_unreference(obj);
1043 mutex_unlock(&dev->struct_mutex);
1048 * Called when user space has done writes to this buffer
1051 i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1052 struct drm_file *file_priv)
1054 struct drm_i915_gem_sw_finish *args = data;
1055 struct drm_gem_object *obj;
1056 struct drm_i915_gem_object *obj_priv;
1059 if (!(dev->driver->driver_features & DRIVER_GEM))
1062 mutex_lock(&dev->struct_mutex);
1063 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1065 mutex_unlock(&dev->struct_mutex);
1070 DRM_INFO("%s: sw_finish %d (%p %zd)\n",
1071 __func__, args->handle, obj, obj->size);
1073 obj_priv = to_intel_bo(obj);
1075 /* Pinned buffers may be scanout, so flush the cache */
1076 if (obj_priv->pin_count)
1077 i915_gem_object_flush_cpu_write_domain(obj);
1079 drm_gem_object_unreference(obj);
1080 mutex_unlock(&dev->struct_mutex);
1085 * Maps the contents of an object, returning the address it is mapped
1088 * While the mapping holds a reference on the contents of the object, it doesn't
1089 * imply a ref on the object itself.
1092 i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1093 struct drm_file *file_priv)
1095 struct drm_i915_gem_mmap *args = data;
1096 struct drm_gem_object *obj;
1100 if (!(dev->driver->driver_features & DRIVER_GEM))
1103 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1107 offset = args->offset;
1109 down_write(¤t->mm->mmap_sem);
1110 addr = do_mmap(obj->filp, 0, args->size,
1111 PROT_READ | PROT_WRITE, MAP_SHARED,
1113 up_write(¤t->mm->mmap_sem);
1114 drm_gem_object_unreference_unlocked(obj);
1115 if (IS_ERR((void *)addr))
1118 args->addr_ptr = (uint64_t) addr;
1124 * i915_gem_fault - fault a page into the GTT
1125 * vma: VMA in question
1128 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1129 * from userspace. The fault handler takes care of binding the object to
1130 * the GTT (if needed), allocating and programming a fence register (again,
1131 * only if needed based on whether the old reg is still valid or the object
1132 * is tiled) and inserting a new PTE into the faulting process.
1134 * Note that the faulting process may involve evicting existing objects
1135 * from the GTT and/or fence registers to make room. So performance may
1136 * suffer if the GTT working set is large or there are few fence registers
1139 int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1141 struct drm_gem_object *obj = vma->vm_private_data;
1142 struct drm_device *dev = obj->dev;
1143 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1144 pgoff_t page_offset;
1147 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
1149 /* We don't use vmf->pgoff since that has the fake offset */
1150 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1153 /* Now bind it into the GTT if needed */
1154 mutex_lock(&dev->struct_mutex);
1155 if (!obj_priv->gtt_space) {
1156 ret = i915_gem_object_bind_to_gtt(obj, 0);
1160 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1165 /* Need a new fence register? */
1166 if (obj_priv->tiling_mode != I915_TILING_NONE) {
1167 ret = i915_gem_object_get_fence_reg(obj);
1172 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1175 /* Finally, remap it using the new GTT offset */
1176 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1178 mutex_unlock(&dev->struct_mutex);
1183 return VM_FAULT_NOPAGE;
1186 return VM_FAULT_OOM;
1188 return VM_FAULT_SIGBUS;
1193 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1194 * @obj: obj in question
1196 * GEM memory mapping works by handing back to userspace a fake mmap offset
1197 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1198 * up the object based on the offset and sets up the various memory mapping
1201 * This routine allocates and attaches a fake offset for @obj.
1204 i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1206 struct drm_device *dev = obj->dev;
1207 struct drm_gem_mm *mm = dev->mm_private;
1208 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1209 struct drm_map_list *list;
1210 struct drm_local_map *map;
1213 /* Set the object up for mmap'ing */
1214 list = &obj->map_list;
1215 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
1220 map->type = _DRM_GEM;
1221 map->size = obj->size;
1224 /* Get a DRM GEM mmap offset allocated... */
1225 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1226 obj->size / PAGE_SIZE, 0, 0);
1227 if (!list->file_offset_node) {
1228 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1233 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1234 obj->size / PAGE_SIZE, 0);
1235 if (!list->file_offset_node) {
1240 list->hash.key = list->file_offset_node->start;
1241 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1242 DRM_ERROR("failed to add to map hash\n");
1247 /* By now we should be all set, any drm_mmap request on the offset
1248 * below will get to our mmap & fault handler */
1249 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1254 drm_mm_put_block(list->file_offset_node);
1262 * i915_gem_release_mmap - remove physical page mappings
1263 * @obj: obj in question
1265 * Preserve the reservation of the mmapping with the DRM core code, but
1266 * relinquish ownership of the pages back to the system.
1268 * It is vital that we remove the page mapping if we have mapped a tiled
1269 * object through the GTT and then lose the fence register due to
1270 * resource pressure. Similarly if the object has been moved out of the
1271 * aperture, than pages mapped into userspace must be revoked. Removing the
1272 * mapping will then trigger a page fault on the next user access, allowing
1273 * fixup by i915_gem_fault().
1276 i915_gem_release_mmap(struct drm_gem_object *obj)
1278 struct drm_device *dev = obj->dev;
1279 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1281 if (dev->dev_mapping)
1282 unmap_mapping_range(dev->dev_mapping,
1283 obj_priv->mmap_offset, obj->size, 1);
1287 i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1289 struct drm_device *dev = obj->dev;
1290 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1291 struct drm_gem_mm *mm = dev->mm_private;
1292 struct drm_map_list *list;
1294 list = &obj->map_list;
1295 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1297 if (list->file_offset_node) {
1298 drm_mm_put_block(list->file_offset_node);
1299 list->file_offset_node = NULL;
1307 obj_priv->mmap_offset = 0;
1311 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1312 * @obj: object to check
1314 * Return the required GTT alignment for an object, taking into account
1315 * potential fence register mapping if needed.
1318 i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1320 struct drm_device *dev = obj->dev;
1321 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1325 * Minimum alignment is 4k (GTT page size), but might be greater
1326 * if a fence register is needed for the object.
1328 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1332 * Previous chips need to be aligned to the size of the smallest
1333 * fence register that can contain the object.
1340 for (i = start; i < obj->size; i <<= 1)
1347 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1349 * @data: GTT mapping ioctl data
1350 * @file_priv: GEM object info
1352 * Simply returns the fake offset to userspace so it can mmap it.
1353 * The mmap call will end up in drm_gem_mmap(), which will set things
1354 * up so we can get faults in the handler above.
1356 * The fault handler will take care of binding the object into the GTT
1357 * (since it may have been evicted to make room for something), allocating
1358 * a fence register, and mapping the appropriate aperture address into
1362 i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1363 struct drm_file *file_priv)
1365 struct drm_i915_gem_mmap_gtt *args = data;
1366 struct drm_gem_object *obj;
1367 struct drm_i915_gem_object *obj_priv;
1370 if (!(dev->driver->driver_features & DRIVER_GEM))
1373 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1377 mutex_lock(&dev->struct_mutex);
1379 obj_priv = to_intel_bo(obj);
1381 if (obj_priv->madv != I915_MADV_WILLNEED) {
1382 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1383 drm_gem_object_unreference(obj);
1384 mutex_unlock(&dev->struct_mutex);
1389 if (!obj_priv->mmap_offset) {
1390 ret = i915_gem_create_mmap_offset(obj);
1392 drm_gem_object_unreference(obj);
1393 mutex_unlock(&dev->struct_mutex);
1398 args->offset = obj_priv->mmap_offset;
1401 * Pull it into the GTT so that we have a page list (makes the
1402 * initial fault faster and any subsequent flushing possible).
1404 if (!obj_priv->agp_mem) {
1405 ret = i915_gem_object_bind_to_gtt(obj, 0);
1407 drm_gem_object_unreference(obj);
1408 mutex_unlock(&dev->struct_mutex);
1413 drm_gem_object_unreference(obj);
1414 mutex_unlock(&dev->struct_mutex);
1420 i915_gem_object_put_pages(struct drm_gem_object *obj)
1422 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1423 int page_count = obj->size / PAGE_SIZE;
1426 BUG_ON(obj_priv->pages_refcount == 0);
1427 BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
1429 if (--obj_priv->pages_refcount != 0)
1432 if (obj_priv->tiling_mode != I915_TILING_NONE)
1433 i915_gem_object_save_bit_17_swizzle(obj);
1435 if (obj_priv->madv == I915_MADV_DONTNEED)
1436 obj_priv->dirty = 0;
1438 for (i = 0; i < page_count; i++) {
1439 if (obj_priv->dirty)
1440 set_page_dirty(obj_priv->pages[i]);
1442 if (obj_priv->madv == I915_MADV_WILLNEED)
1443 mark_page_accessed(obj_priv->pages[i]);
1445 page_cache_release(obj_priv->pages[i]);
1447 obj_priv->dirty = 0;
1449 drm_free_large(obj_priv->pages);
1450 obj_priv->pages = NULL;
1454 i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1455 struct intel_ring_buffer *ring)
1457 struct drm_device *dev = obj->dev;
1458 drm_i915_private_t *dev_priv = dev->dev_private;
1459 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1460 BUG_ON(ring == NULL);
1461 obj_priv->ring = ring;
1463 /* Add a reference if we're newly entering the active list. */
1464 if (!obj_priv->active) {
1465 drm_gem_object_reference(obj);
1466 obj_priv->active = 1;
1468 /* Move from whatever list we were on to the tail of execution. */
1469 spin_lock(&dev_priv->mm.active_list_lock);
1470 list_move_tail(&obj_priv->list, &ring->active_list);
1471 spin_unlock(&dev_priv->mm.active_list_lock);
1472 obj_priv->last_rendering_seqno = seqno;
1476 i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1478 struct drm_device *dev = obj->dev;
1479 drm_i915_private_t *dev_priv = dev->dev_private;
1480 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1482 BUG_ON(!obj_priv->active);
1483 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1484 obj_priv->last_rendering_seqno = 0;
1487 /* Immediately discard the backing storage */
1489 i915_gem_object_truncate(struct drm_gem_object *obj)
1491 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1492 struct inode *inode;
1494 inode = obj->filp->f_path.dentry->d_inode;
1495 if (inode->i_op->truncate)
1496 inode->i_op->truncate (inode);
1498 obj_priv->madv = __I915_MADV_PURGED;
1502 i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1504 return obj_priv->madv == I915_MADV_DONTNEED;
1508 i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1510 struct drm_device *dev = obj->dev;
1511 drm_i915_private_t *dev_priv = dev->dev_private;
1512 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1514 i915_verify_inactive(dev, __FILE__, __LINE__);
1515 if (obj_priv->pin_count != 0)
1516 list_del_init(&obj_priv->list);
1518 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1520 BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1522 obj_priv->last_rendering_seqno = 0;
1523 obj_priv->ring = NULL;
1524 if (obj_priv->active) {
1525 obj_priv->active = 0;
1526 drm_gem_object_unreference(obj);
1528 i915_verify_inactive(dev, __FILE__, __LINE__);
1532 i915_gem_process_flushing_list(struct drm_device *dev,
1533 uint32_t flush_domains, uint32_t seqno,
1534 struct intel_ring_buffer *ring)
1536 drm_i915_private_t *dev_priv = dev->dev_private;
1537 struct drm_i915_gem_object *obj_priv, *next;
1539 list_for_each_entry_safe(obj_priv, next,
1540 &dev_priv->mm.gpu_write_list,
1542 struct drm_gem_object *obj = &obj_priv->base;
1544 if ((obj->write_domain & flush_domains) ==
1545 obj->write_domain &&
1546 obj_priv->ring->ring_flag == ring->ring_flag) {
1547 uint32_t old_write_domain = obj->write_domain;
1549 obj->write_domain = 0;
1550 list_del_init(&obj_priv->gpu_write_list);
1551 i915_gem_object_move_to_active(obj, seqno, ring);
1553 /* update the fence lru list */
1554 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1555 struct drm_i915_fence_reg *reg =
1556 &dev_priv->fence_regs[obj_priv->fence_reg];
1557 list_move_tail(®->lru_list,
1558 &dev_priv->mm.fence_list);
1561 trace_i915_gem_object_change_domain(obj,
1569 i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1570 uint32_t flush_domains, struct intel_ring_buffer *ring)
1572 drm_i915_private_t *dev_priv = dev->dev_private;
1573 struct drm_i915_file_private *i915_file_priv = NULL;
1574 struct drm_i915_gem_request *request;
1578 if (file_priv != NULL)
1579 i915_file_priv = file_priv->driver_priv;
1581 request = kzalloc(sizeof(*request), GFP_KERNEL);
1582 if (request == NULL)
1585 seqno = ring->add_request(dev, ring, file_priv, flush_domains);
1587 request->seqno = seqno;
1588 request->ring = ring;
1589 request->emitted_jiffies = jiffies;
1590 was_empty = list_empty(&ring->request_list);
1591 list_add_tail(&request->list, &ring->request_list);
1593 if (i915_file_priv) {
1594 list_add_tail(&request->client_list,
1595 &i915_file_priv->mm.request_list);
1597 INIT_LIST_HEAD(&request->client_list);
1600 /* Associate any objects on the flushing list matching the write
1601 * domain we're flushing with our flush.
1603 if (flush_domains != 0)
1604 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
1606 if (!dev_priv->mm.suspended) {
1607 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1609 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1615 * Command execution barrier
1617 * Ensures that all commands in the ring are finished
1618 * before signalling the CPU
1621 i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
1623 uint32_t flush_domains = 0;
1625 /* The sampler always gets flushed on i965 (sigh) */
1627 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1629 ring->flush(dev, ring,
1630 I915_GEM_DOMAIN_COMMAND, flush_domains);
1631 return flush_domains;
1635 * Moves buffers associated only with the given active seqno from the active
1636 * to inactive list, potentially freeing them.
1639 i915_gem_retire_request(struct drm_device *dev,
1640 struct drm_i915_gem_request *request)
1642 drm_i915_private_t *dev_priv = dev->dev_private;
1644 trace_i915_gem_request_retire(dev, request->seqno);
1646 /* Move any buffers on the active list that are no longer referenced
1647 * by the ringbuffer to the flushing/inactive lists as appropriate.
1649 spin_lock(&dev_priv->mm.active_list_lock);
1650 while (!list_empty(&request->ring->active_list)) {
1651 struct drm_gem_object *obj;
1652 struct drm_i915_gem_object *obj_priv;
1654 obj_priv = list_first_entry(&request->ring->active_list,
1655 struct drm_i915_gem_object,
1657 obj = &obj_priv->base;
1659 /* If the seqno being retired doesn't match the oldest in the
1660 * list, then the oldest in the list must still be newer than
1663 if (obj_priv->last_rendering_seqno != request->seqno)
1667 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1668 __func__, request->seqno, obj);
1671 if (obj->write_domain != 0)
1672 i915_gem_object_move_to_flushing(obj);
1674 /* Take a reference on the object so it won't be
1675 * freed while the spinlock is held. The list
1676 * protection for this spinlock is safe when breaking
1677 * the lock like this since the next thing we do
1678 * is just get the head of the list again.
1680 drm_gem_object_reference(obj);
1681 i915_gem_object_move_to_inactive(obj);
1682 spin_unlock(&dev_priv->mm.active_list_lock);
1683 drm_gem_object_unreference(obj);
1684 spin_lock(&dev_priv->mm.active_list_lock);
1688 spin_unlock(&dev_priv->mm.active_list_lock);
1692 * Returns true if seq1 is later than seq2.
1695 i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1697 return (int32_t)(seq1 - seq2) >= 0;
1701 i915_get_gem_seqno(struct drm_device *dev,
1702 struct intel_ring_buffer *ring)
1704 return ring->get_gem_seqno(dev, ring);
1708 * This function clears the request list as sequence numbers are passed.
1711 i915_gem_retire_requests_ring(struct drm_device *dev,
1712 struct intel_ring_buffer *ring)
1714 drm_i915_private_t *dev_priv = dev->dev_private;
1717 if (!ring->status_page.page_addr
1718 || list_empty(&ring->request_list))
1721 seqno = i915_get_gem_seqno(dev, ring);
1723 while (!list_empty(&ring->request_list)) {
1724 struct drm_i915_gem_request *request;
1725 uint32_t retiring_seqno;
1727 request = list_first_entry(&ring->request_list,
1728 struct drm_i915_gem_request,
1730 retiring_seqno = request->seqno;
1732 if (i915_seqno_passed(seqno, retiring_seqno) ||
1733 atomic_read(&dev_priv->mm.wedged)) {
1734 i915_gem_retire_request(dev, request);
1736 list_del(&request->list);
1737 list_del(&request->client_list);
1743 if (unlikely (dev_priv->trace_irq_seqno &&
1744 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
1746 ring->user_irq_put(dev, ring);
1747 dev_priv->trace_irq_seqno = 0;
1752 i915_gem_retire_requests(struct drm_device *dev)
1754 drm_i915_private_t *dev_priv = dev->dev_private;
1756 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1757 struct drm_i915_gem_object *obj_priv, *tmp;
1759 /* We must be careful that during unbind() we do not
1760 * accidentally infinitely recurse into retire requests.
1762 * retire -> free -> unbind -> wait -> retire_ring
1764 list_for_each_entry_safe(obj_priv, tmp,
1765 &dev_priv->mm.deferred_free_list,
1767 i915_gem_free_object_tail(&obj_priv->base);
1770 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1772 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1776 i915_gem_retire_work_handler(struct work_struct *work)
1778 drm_i915_private_t *dev_priv;
1779 struct drm_device *dev;
1781 dev_priv = container_of(work, drm_i915_private_t,
1782 mm.retire_work.work);
1783 dev = dev_priv->dev;
1785 mutex_lock(&dev->struct_mutex);
1786 i915_gem_retire_requests(dev);
1788 if (!dev_priv->mm.suspended &&
1789 (!list_empty(&dev_priv->render_ring.request_list) ||
1791 !list_empty(&dev_priv->bsd_ring.request_list))))
1792 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1793 mutex_unlock(&dev->struct_mutex);
1797 i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1798 int interruptible, struct intel_ring_buffer *ring)
1800 drm_i915_private_t *dev_priv = dev->dev_private;
1806 if (atomic_read(&dev_priv->mm.wedged))
1809 if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
1810 if (HAS_PCH_SPLIT(dev))
1811 ier = I915_READ(DEIER) | I915_READ(GTIER);
1813 ier = I915_READ(IER);
1815 DRM_ERROR("something (likely vbetool) disabled "
1816 "interrupts, re-enabling\n");
1817 i915_driver_irq_preinstall(dev);
1818 i915_driver_irq_postinstall(dev);
1821 trace_i915_gem_request_wait_begin(dev, seqno);
1823 ring->waiting_gem_seqno = seqno;
1824 ring->user_irq_get(dev, ring);
1826 ret = wait_event_interruptible(ring->irq_queue,
1828 ring->get_gem_seqno(dev, ring), seqno)
1829 || atomic_read(&dev_priv->mm.wedged));
1831 wait_event(ring->irq_queue,
1833 ring->get_gem_seqno(dev, ring), seqno)
1834 || atomic_read(&dev_priv->mm.wedged));
1836 ring->user_irq_put(dev, ring);
1837 ring->waiting_gem_seqno = 0;
1839 trace_i915_gem_request_wait_end(dev, seqno);
1841 if (atomic_read(&dev_priv->mm.wedged))
1844 if (ret && ret != -ERESTARTSYS)
1845 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1846 __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
1848 /* Directly dispatch request retiring. While we have the work queue
1849 * to handle this, the waiter on a request often wants an associated
1850 * buffer to have made it to the inactive list, and we would need
1851 * a separate wait queue to handle that.
1854 i915_gem_retire_requests_ring(dev, ring);
1860 * Waits for a sequence number to be signaled, and cleans up the
1861 * request and object lists appropriately for that event.
1864 i915_wait_request(struct drm_device *dev, uint32_t seqno,
1865 struct intel_ring_buffer *ring)
1867 return i915_do_wait_request(dev, seqno, 1, ring);
1871 i915_gem_flush(struct drm_device *dev,
1872 uint32_t invalidate_domains,
1873 uint32_t flush_domains)
1875 drm_i915_private_t *dev_priv = dev->dev_private;
1876 if (flush_domains & I915_GEM_DOMAIN_CPU)
1877 drm_agp_chipset_flush(dev);
1878 dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1883 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1889 i915_gem_flush_ring(struct drm_device *dev,
1890 uint32_t invalidate_domains,
1891 uint32_t flush_domains,
1892 struct intel_ring_buffer *ring)
1894 if (flush_domains & I915_GEM_DOMAIN_CPU)
1895 drm_agp_chipset_flush(dev);
1896 ring->flush(dev, ring,
1902 * Ensures that all rendering to the object has completed and the object is
1903 * safe to unbind from the GTT or access from the CPU.
1906 i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1908 struct drm_device *dev = obj->dev;
1909 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1912 /* This function only exists to support waiting for existing rendering,
1913 * not for emitting required flushes.
1915 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
1917 /* If there is rendering queued on the buffer being evicted, wait for
1920 if (obj_priv->active) {
1922 DRM_INFO("%s: object %p wait for seqno %08x\n",
1923 __func__, obj, obj_priv->last_rendering_seqno);
1925 ret = i915_wait_request(dev,
1926 obj_priv->last_rendering_seqno, obj_priv->ring);
1935 * Unbinds an object from the GTT aperture.
1938 i915_gem_object_unbind(struct drm_gem_object *obj)
1940 struct drm_device *dev = obj->dev;
1941 drm_i915_private_t *dev_priv = dev->dev_private;
1942 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1946 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1947 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1949 if (obj_priv->gtt_space == NULL)
1952 if (obj_priv->pin_count != 0) {
1953 DRM_ERROR("Attempting to unbind pinned buffer\n");
1957 /* blow away mappings if mapped through GTT */
1958 i915_gem_release_mmap(obj);
1960 /* Move the object to the CPU domain to ensure that
1961 * any possible CPU writes while it's not in the GTT
1962 * are flushed when we go to remap it. This will
1963 * also ensure that all pending GPU writes are finished
1966 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
1967 if (ret == -ERESTARTSYS)
1969 /* Continue on if we fail due to EIO, the GPU is hung so we
1970 * should be safe and we need to cleanup or else we might
1971 * cause memory corruption through use-after-free.
1974 BUG_ON(obj_priv->active);
1976 /* release the fence reg _after_ flushing */
1977 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1978 i915_gem_clear_fence_reg(obj);
1980 if (obj_priv->agp_mem != NULL) {
1981 drm_unbind_agp(obj_priv->agp_mem);
1982 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1983 obj_priv->agp_mem = NULL;
1986 i915_gem_object_put_pages(obj);
1987 BUG_ON(obj_priv->pages_refcount);
1989 if (obj_priv->gtt_space) {
1990 atomic_dec(&dev->gtt_count);
1991 atomic_sub(obj->size, &dev->gtt_memory);
1993 drm_mm_put_block(obj_priv->gtt_space);
1994 obj_priv->gtt_space = NULL;
1997 /* Remove ourselves from the LRU list if present. */
1998 spin_lock(&dev_priv->mm.active_list_lock);
1999 if (!list_empty(&obj_priv->list))
2000 list_del_init(&obj_priv->list);
2001 spin_unlock(&dev_priv->mm.active_list_lock);
2003 if (i915_gem_object_is_purgeable(obj_priv))
2004 i915_gem_object_truncate(obj);
2006 trace_i915_gem_object_unbind(obj);
2012 i915_gem_scan_inactive_list_and_evict(struct drm_device *dev, int min_size,
2013 unsigned alignment, int *found)
2015 drm_i915_private_t *dev_priv = dev->dev_private;
2016 struct drm_gem_object *obj;
2017 struct drm_i915_gem_object *obj_priv;
2018 struct drm_gem_object *best = NULL;
2019 struct drm_gem_object *first = NULL;
2021 /* Try to find the smallest clean object */
2022 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
2023 struct drm_gem_object *obj = &obj_priv->base;
2024 if (obj->size >= min_size) {
2025 if ((!obj_priv->dirty ||
2026 i915_gem_object_is_purgeable(obj_priv)) &&
2027 (!best || obj->size < best->size)) {
2029 if (best->size == min_size)
2037 obj = best ? best : first;
2047 DRM_INFO("%s: evicting %p\n", __func__, obj);
2049 obj_priv = to_intel_bo(obj);
2050 BUG_ON(obj_priv->pin_count != 0);
2051 BUG_ON(obj_priv->active);
2053 /* Wait on the rendering and unbind the buffer. */
2054 return i915_gem_object_unbind(obj);
2058 i915_gpu_idle(struct drm_device *dev)
2060 drm_i915_private_t *dev_priv = dev->dev_private;
2062 uint32_t seqno1, seqno2;
2065 spin_lock(&dev_priv->mm.active_list_lock);
2066 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2067 list_empty(&dev_priv->render_ring.active_list) &&
2069 list_empty(&dev_priv->bsd_ring.active_list)));
2070 spin_unlock(&dev_priv->mm.active_list_lock);
2075 /* Flush everything onto the inactive list. */
2076 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
2077 seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2078 &dev_priv->render_ring);
2081 ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2084 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2085 &dev_priv->bsd_ring);
2089 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2099 i915_gem_evict_everything(struct drm_device *dev)
2101 drm_i915_private_t *dev_priv = dev->dev_private;
2105 spin_lock(&dev_priv->mm.active_list_lock);
2106 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2107 list_empty(&dev_priv->mm.flushing_list) &&
2108 list_empty(&dev_priv->render_ring.active_list) &&
2110 || list_empty(&dev_priv->bsd_ring.active_list)));
2111 spin_unlock(&dev_priv->mm.active_list_lock);
2116 /* Flush everything (on to the inactive lists) and evict */
2117 ret = i915_gpu_idle(dev);
2121 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2123 ret = i915_gem_evict_from_inactive_list(dev);
2127 spin_lock(&dev_priv->mm.active_list_lock);
2128 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2129 list_empty(&dev_priv->mm.flushing_list) &&
2130 list_empty(&dev_priv->render_ring.active_list) &&
2132 || list_empty(&dev_priv->bsd_ring.active_list)));
2133 spin_unlock(&dev_priv->mm.active_list_lock);
2134 BUG_ON(!lists_empty);
2140 i915_gem_evict_something(struct drm_device *dev,
2141 int min_size, unsigned alignment)
2143 drm_i915_private_t *dev_priv = dev->dev_private;
2146 struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
2147 struct intel_ring_buffer *bsd_ring = &dev_priv->bsd_ring;
2149 i915_gem_retire_requests(dev);
2151 /* If there's an inactive buffer available now, grab it
2154 ret = i915_gem_scan_inactive_list_and_evict(dev, min_size,
2160 /* If we didn't get anything, but the ring is still processing
2161 * things, wait for the next to finish and hopefully leave us
2162 * a buffer to evict.
2164 if (!list_empty(&render_ring->request_list)) {
2165 struct drm_i915_gem_request *request;
2167 request = list_first_entry(&render_ring->request_list,
2168 struct drm_i915_gem_request,
2171 ret = i915_wait_request(dev,
2172 request->seqno, request->ring);
2179 if (HAS_BSD(dev) && !list_empty(&bsd_ring->request_list)) {
2180 struct drm_i915_gem_request *request;
2182 request = list_first_entry(&bsd_ring->request_list,
2183 struct drm_i915_gem_request,
2186 ret = i915_wait_request(dev,
2187 request->seqno, request->ring);
2194 /* If we didn't have anything on the request list but there
2195 * are buffers awaiting a flush, emit one and try again.
2196 * When we wait on it, those buffers waiting for that flush
2197 * will get moved to inactive.
2199 if (!list_empty(&dev_priv->mm.flushing_list)) {
2200 struct drm_gem_object *obj = NULL;
2201 struct drm_i915_gem_object *obj_priv;
2203 /* Find an object that we can immediately reuse */
2204 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
2205 obj = &obj_priv->base;
2206 if (obj->size >= min_size)
2215 i915_gem_flush_ring(dev,
2219 seqno = i915_add_request(dev, NULL,
2228 /* If we didn't do any of the above, there's no single buffer
2229 * large enough to swap out for the new one, so just evict
2230 * everything and start again. (This should be rare.)
2232 if (!list_empty (&dev_priv->mm.inactive_list))
2233 return i915_gem_evict_from_inactive_list(dev);
2235 return i915_gem_evict_everything(dev);
2240 i915_gem_object_get_pages(struct drm_gem_object *obj,
2243 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2245 struct address_space *mapping;
2246 struct inode *inode;
2249 BUG_ON(obj_priv->pages_refcount
2250 == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2252 if (obj_priv->pages_refcount++ != 0)
2255 /* Get the list of pages out of our struct file. They'll be pinned
2256 * at this point until we release them.
2258 page_count = obj->size / PAGE_SIZE;
2259 BUG_ON(obj_priv->pages != NULL);
2260 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
2261 if (obj_priv->pages == NULL) {
2262 obj_priv->pages_refcount--;
2266 inode = obj->filp->f_path.dentry->d_inode;
2267 mapping = inode->i_mapping;
2268 for (i = 0; i < page_count; i++) {
2269 page = read_cache_page_gfp(mapping, i,
2277 obj_priv->pages[i] = page;
2280 if (obj_priv->tiling_mode != I915_TILING_NONE)
2281 i915_gem_object_do_bit_17_swizzle(obj);
2287 page_cache_release(obj_priv->pages[i]);
2289 drm_free_large(obj_priv->pages);
2290 obj_priv->pages = NULL;
2291 obj_priv->pages_refcount--;
2292 return PTR_ERR(page);
2295 static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2297 struct drm_gem_object *obj = reg->obj;
2298 struct drm_device *dev = obj->dev;
2299 drm_i915_private_t *dev_priv = dev->dev_private;
2300 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2301 int regnum = obj_priv->fence_reg;
2304 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2306 val |= obj_priv->gtt_offset & 0xfffff000;
2307 val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2308 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2310 if (obj_priv->tiling_mode == I915_TILING_Y)
2311 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2312 val |= I965_FENCE_REG_VALID;
2314 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2317 static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2319 struct drm_gem_object *obj = reg->obj;
2320 struct drm_device *dev = obj->dev;
2321 drm_i915_private_t *dev_priv = dev->dev_private;
2322 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2323 int regnum = obj_priv->fence_reg;
2326 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2328 val |= obj_priv->gtt_offset & 0xfffff000;
2329 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2330 if (obj_priv->tiling_mode == I915_TILING_Y)
2331 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2332 val |= I965_FENCE_REG_VALID;
2334 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2337 static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2339 struct drm_gem_object *obj = reg->obj;
2340 struct drm_device *dev = obj->dev;
2341 drm_i915_private_t *dev_priv = dev->dev_private;
2342 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2343 int regnum = obj_priv->fence_reg;
2345 uint32_t fence_reg, val;
2348 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2349 (obj_priv->gtt_offset & (obj->size - 1))) {
2350 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
2351 __func__, obj_priv->gtt_offset, obj->size);
2355 if (obj_priv->tiling_mode == I915_TILING_Y &&
2356 HAS_128_BYTE_Y_TILING(dev))
2361 /* Note: pitch better be a power of two tile widths */
2362 pitch_val = obj_priv->stride / tile_width;
2363 pitch_val = ffs(pitch_val) - 1;
2365 if (obj_priv->tiling_mode == I915_TILING_Y &&
2366 HAS_128_BYTE_Y_TILING(dev))
2367 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2369 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2371 val = obj_priv->gtt_offset;
2372 if (obj_priv->tiling_mode == I915_TILING_Y)
2373 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2374 val |= I915_FENCE_SIZE_BITS(obj->size);
2375 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2376 val |= I830_FENCE_REG_VALID;
2379 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2381 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2382 I915_WRITE(fence_reg, val);
2385 static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2387 struct drm_gem_object *obj = reg->obj;
2388 struct drm_device *dev = obj->dev;
2389 drm_i915_private_t *dev_priv = dev->dev_private;
2390 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2391 int regnum = obj_priv->fence_reg;
2394 uint32_t fence_size_bits;
2396 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
2397 (obj_priv->gtt_offset & (obj->size - 1))) {
2398 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
2399 __func__, obj_priv->gtt_offset);
2403 pitch_val = obj_priv->stride / 128;
2404 pitch_val = ffs(pitch_val) - 1;
2405 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2407 val = obj_priv->gtt_offset;
2408 if (obj_priv->tiling_mode == I915_TILING_Y)
2409 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2410 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2411 WARN_ON(fence_size_bits & ~0x00000f00);
2412 val |= fence_size_bits;
2413 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2414 val |= I830_FENCE_REG_VALID;
2416 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
2419 static int i915_find_fence_reg(struct drm_device *dev)
2421 struct drm_i915_fence_reg *reg = NULL;
2422 struct drm_i915_gem_object *obj_priv = NULL;
2423 struct drm_i915_private *dev_priv = dev->dev_private;
2424 struct drm_gem_object *obj = NULL;
2427 /* First try to find a free reg */
2429 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2430 reg = &dev_priv->fence_regs[i];
2434 obj_priv = to_intel_bo(reg->obj);
2435 if (!obj_priv->pin_count)
2442 /* None available, try to steal one or wait for a user to finish */
2443 i = I915_FENCE_REG_NONE;
2444 list_for_each_entry(reg, &dev_priv->mm.fence_list,
2447 obj_priv = to_intel_bo(obj);
2449 if (obj_priv->pin_count)
2453 i = obj_priv->fence_reg;
2457 BUG_ON(i == I915_FENCE_REG_NONE);
2459 /* We only have a reference on obj from the active list. put_fence_reg
2460 * might drop that one, causing a use-after-free in it. So hold a
2461 * private reference to obj like the other callers of put_fence_reg
2462 * (set_tiling ioctl) do. */
2463 drm_gem_object_reference(obj);
2464 ret = i915_gem_object_put_fence_reg(obj);
2465 drm_gem_object_unreference(obj);
2473 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2474 * @obj: object to map through a fence reg
2476 * When mapping objects through the GTT, userspace wants to be able to write
2477 * to them without having to worry about swizzling if the object is tiled.
2479 * This function walks the fence regs looking for a free one for @obj,
2480 * stealing one if it can't find any.
2482 * It then sets up the reg based on the object's properties: address, pitch
2483 * and tiling format.
2486 i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
2488 struct drm_device *dev = obj->dev;
2489 struct drm_i915_private *dev_priv = dev->dev_private;
2490 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2491 struct drm_i915_fence_reg *reg = NULL;
2494 /* Just update our place in the LRU if our fence is getting used. */
2495 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2496 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2497 list_move_tail(®->lru_list, &dev_priv->mm.fence_list);
2501 switch (obj_priv->tiling_mode) {
2502 case I915_TILING_NONE:
2503 WARN(1, "allocating a fence for non-tiled object?\n");
2506 if (!obj_priv->stride)
2508 WARN((obj_priv->stride & (512 - 1)),
2509 "object 0x%08x is X tiled but has non-512B pitch\n",
2510 obj_priv->gtt_offset);
2513 if (!obj_priv->stride)
2515 WARN((obj_priv->stride & (128 - 1)),
2516 "object 0x%08x is Y tiled but has non-128B pitch\n",
2517 obj_priv->gtt_offset);
2521 ret = i915_find_fence_reg(dev);
2525 obj_priv->fence_reg = ret;
2526 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2527 list_add_tail(®->lru_list, &dev_priv->mm.fence_list);
2532 sandybridge_write_fence_reg(reg);
2533 else if (IS_I965G(dev))
2534 i965_write_fence_reg(reg);
2535 else if (IS_I9XX(dev))
2536 i915_write_fence_reg(reg);
2538 i830_write_fence_reg(reg);
2540 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2541 obj_priv->tiling_mode);
2547 * i915_gem_clear_fence_reg - clear out fence register info
2548 * @obj: object to clear
2550 * Zeroes out the fence register itself and clears out the associated
2551 * data structures in dev_priv and obj_priv.
2554 i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2556 struct drm_device *dev = obj->dev;
2557 drm_i915_private_t *dev_priv = dev->dev_private;
2558 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2559 struct drm_i915_fence_reg *reg =
2560 &dev_priv->fence_regs[obj_priv->fence_reg];
2563 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2564 (obj_priv->fence_reg * 8), 0);
2565 } else if (IS_I965G(dev)) {
2566 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
2570 if (obj_priv->fence_reg < 8)
2571 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2573 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2576 I915_WRITE(fence_reg, 0);
2580 obj_priv->fence_reg = I915_FENCE_REG_NONE;
2581 list_del_init(®->lru_list);
2585 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2586 * to the buffer to finish, and then resets the fence register.
2587 * @obj: tiled object holding a fence register.
2589 * Zeroes out the fence register itself and clears out the associated
2590 * data structures in dev_priv and obj_priv.
2593 i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2595 struct drm_device *dev = obj->dev;
2596 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2598 if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2601 /* If we've changed tiling, GTT-mappings of the object
2602 * need to re-fault to ensure that the correct fence register
2603 * setup is in place.
2605 i915_gem_release_mmap(obj);
2607 /* On the i915, GPU access to tiled buffers is via a fence,
2608 * therefore we must wait for any outstanding access to complete
2609 * before clearing the fence.
2611 if (!IS_I965G(dev)) {
2614 ret = i915_gem_object_flush_gpu_write_domain(obj);
2618 ret = i915_gem_object_wait_rendering(obj);
2623 i915_gem_object_flush_gtt_write_domain(obj);
2624 i915_gem_clear_fence_reg (obj);
2630 * Finds free space in the GTT aperture and binds the object there.
2633 i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2635 struct drm_device *dev = obj->dev;
2636 drm_i915_private_t *dev_priv = dev->dev_private;
2637 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2638 struct drm_mm_node *free_space;
2639 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
2642 if (obj_priv->madv != I915_MADV_WILLNEED) {
2643 DRM_ERROR("Attempting to bind a purgeable object\n");
2648 alignment = i915_gem_get_gtt_alignment(obj);
2649 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
2650 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2654 /* If the object is bigger than the entire aperture, reject it early
2655 * before evicting everything in a vain attempt to find space.
2657 if (obj->size > dev->gtt_total) {
2658 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2663 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2664 obj->size, alignment, 0);
2665 if (free_space != NULL) {
2666 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2668 if (obj_priv->gtt_space != NULL)
2669 obj_priv->gtt_offset = obj_priv->gtt_space->start;
2671 if (obj_priv->gtt_space == NULL) {
2672 /* If the gtt is empty and we're still having trouble
2673 * fitting our object in, we're out of memory.
2676 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2678 ret = i915_gem_evict_something(dev, obj->size, alignment);
2686 DRM_INFO("Binding object of size %zd at 0x%08x\n",
2687 obj->size, obj_priv->gtt_offset);
2689 ret = i915_gem_object_get_pages(obj, gfpmask);
2691 drm_mm_put_block(obj_priv->gtt_space);
2692 obj_priv->gtt_space = NULL;
2694 if (ret == -ENOMEM) {
2695 /* first try to clear up some space from the GTT */
2696 ret = i915_gem_evict_something(dev, obj->size,
2699 /* now try to shrink everyone else */
2714 /* Create an AGP memory structure pointing at our pages, and bind it
2717 obj_priv->agp_mem = drm_agp_bind_pages(dev,
2719 obj->size >> PAGE_SHIFT,
2720 obj_priv->gtt_offset,
2721 obj_priv->agp_type);
2722 if (obj_priv->agp_mem == NULL) {
2723 i915_gem_object_put_pages(obj);
2724 drm_mm_put_block(obj_priv->gtt_space);
2725 obj_priv->gtt_space = NULL;
2727 ret = i915_gem_evict_something(dev, obj->size, alignment);
2733 atomic_inc(&dev->gtt_count);
2734 atomic_add(obj->size, &dev->gtt_memory);
2736 /* keep track of bounds object by adding it to the inactive list */
2737 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2739 /* Assert that the object is not currently in any GPU domain. As it
2740 * wasn't in the GTT, there shouldn't be any way it could have been in
2743 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2744 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
2746 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2752 i915_gem_clflush_object(struct drm_gem_object *obj)
2754 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2756 /* If we don't have a page list set up, then we're not pinned
2757 * to GPU, and we can ignore the cache flush because it'll happen
2758 * again at bind time.
2760 if (obj_priv->pages == NULL)
2763 trace_i915_gem_object_clflush(obj);
2765 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
2768 /** Flushes any GPU write domain for the object if it's dirty. */
2770 i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2772 struct drm_device *dev = obj->dev;
2773 uint32_t old_write_domain;
2774 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2776 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2779 /* Queue the GPU write cache flushing we need. */
2780 old_write_domain = obj->write_domain;
2781 i915_gem_flush(dev, 0, obj->write_domain);
2782 if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2785 trace_i915_gem_object_change_domain(obj,
2791 /** Flushes the GTT write domain for the object if it's dirty. */
2793 i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2795 uint32_t old_write_domain;
2797 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2800 /* No actual flushing is required for the GTT write domain. Writes
2801 * to it immediately go to main memory as far as we know, so there's
2802 * no chipset flush. It also doesn't land in render cache.
2804 old_write_domain = obj->write_domain;
2805 obj->write_domain = 0;
2807 trace_i915_gem_object_change_domain(obj,
2812 /** Flushes the CPU write domain for the object if it's dirty. */
2814 i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2816 struct drm_device *dev = obj->dev;
2817 uint32_t old_write_domain;
2819 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2822 i915_gem_clflush_object(obj);
2823 drm_agp_chipset_flush(dev);
2824 old_write_domain = obj->write_domain;
2825 obj->write_domain = 0;
2827 trace_i915_gem_object_change_domain(obj,
2833 i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2837 switch (obj->write_domain) {
2838 case I915_GEM_DOMAIN_GTT:
2839 i915_gem_object_flush_gtt_write_domain(obj);
2841 case I915_GEM_DOMAIN_CPU:
2842 i915_gem_object_flush_cpu_write_domain(obj);
2845 ret = i915_gem_object_flush_gpu_write_domain(obj);
2853 * Moves a single object to the GTT read, and possibly write domain.
2855 * This function returns when the move is complete, including waiting on
2859 i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2861 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2862 uint32_t old_write_domain, old_read_domains;
2865 /* Not valid to be called on unbound objects. */
2866 if (obj_priv->gtt_space == NULL)
2869 ret = i915_gem_object_flush_gpu_write_domain(obj);
2873 /* Wait on any GPU rendering and flushing to occur. */
2874 ret = i915_gem_object_wait_rendering(obj);
2878 old_write_domain = obj->write_domain;
2879 old_read_domains = obj->read_domains;
2881 /* If we're writing through the GTT domain, then CPU and GPU caches
2882 * will need to be invalidated at next use.
2885 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2887 i915_gem_object_flush_cpu_write_domain(obj);
2889 /* It should now be out of any other write domains, and we can update
2890 * the domain values for our changes.
2892 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2893 obj->read_domains |= I915_GEM_DOMAIN_GTT;
2895 obj->write_domain = I915_GEM_DOMAIN_GTT;
2896 obj_priv->dirty = 1;
2899 trace_i915_gem_object_change_domain(obj,
2907 * Prepare buffer for display plane. Use uninterruptible for possible flush
2908 * wait, as in modesetting process we're not supposed to be interrupted.
2911 i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2913 struct drm_device *dev = obj->dev;
2914 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
2915 uint32_t old_write_domain, old_read_domains;
2918 /* Not valid to be called on unbound objects. */
2919 if (obj_priv->gtt_space == NULL)
2922 ret = i915_gem_object_flush_gpu_write_domain(obj);
2926 /* Wait on any GPU rendering and flushing to occur. */
2927 if (obj_priv->active) {
2929 DRM_INFO("%s: object %p wait for seqno %08x\n",
2930 __func__, obj, obj_priv->last_rendering_seqno);
2932 ret = i915_do_wait_request(dev,
2933 obj_priv->last_rendering_seqno,
2940 i915_gem_object_flush_cpu_write_domain(obj);
2942 old_write_domain = obj->write_domain;
2943 old_read_domains = obj->read_domains;
2945 /* It should now be out of any other write domains, and we can update
2946 * the domain values for our changes.
2948 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2949 obj->read_domains = I915_GEM_DOMAIN_GTT;
2950 obj->write_domain = I915_GEM_DOMAIN_GTT;
2951 obj_priv->dirty = 1;
2953 trace_i915_gem_object_change_domain(obj,
2961 * Moves a single object to the CPU read, and possibly write domain.
2963 * This function returns when the move is complete, including waiting on
2967 i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2969 uint32_t old_write_domain, old_read_domains;
2972 ret = i915_gem_object_flush_gpu_write_domain(obj);
2976 /* Wait on any GPU rendering and flushing to occur. */
2977 ret = i915_gem_object_wait_rendering(obj);
2981 i915_gem_object_flush_gtt_write_domain(obj);
2983 /* If we have a partially-valid cache of the object in the CPU,
2984 * finish invalidating it and free the per-page flags.
2986 i915_gem_object_set_to_full_cpu_read_domain(obj);
2988 old_write_domain = obj->write_domain;
2989 old_read_domains = obj->read_domains;
2991 /* Flush the CPU cache if it's still invalid. */
2992 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2993 i915_gem_clflush_object(obj);
2995 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2998 /* It should now be out of any other write domains, and we can update
2999 * the domain values for our changes.
3001 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3003 /* If we're writing through the CPU, then the GPU read domains will
3004 * need to be invalidated at next use.
3007 obj->read_domains &= I915_GEM_DOMAIN_CPU;
3008 obj->write_domain = I915_GEM_DOMAIN_CPU;
3011 trace_i915_gem_object_change_domain(obj,
3019 * Set the next domain for the specified object. This
3020 * may not actually perform the necessary flushing/invaliding though,
3021 * as that may want to be batched with other set_domain operations
3023 * This is (we hope) the only really tricky part of gem. The goal
3024 * is fairly simple -- track which caches hold bits of the object
3025 * and make sure they remain coherent. A few concrete examples may
3026 * help to explain how it works. For shorthand, we use the notation
3027 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3028 * a pair of read and write domain masks.
3030 * Case 1: the batch buffer
3036 * 5. Unmapped from GTT
3039 * Let's take these a step at a time
3042 * Pages allocated from the kernel may still have
3043 * cache contents, so we set them to (CPU, CPU) always.
3044 * 2. Written by CPU (using pwrite)
3045 * The pwrite function calls set_domain (CPU, CPU) and
3046 * this function does nothing (as nothing changes)
3048 * This function asserts that the object is not
3049 * currently in any GPU-based read or write domains
3051 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3052 * As write_domain is zero, this function adds in the
3053 * current read domains (CPU+COMMAND, 0).
3054 * flush_domains is set to CPU.
3055 * invalidate_domains is set to COMMAND
3056 * clflush is run to get data out of the CPU caches
3057 * then i915_dev_set_domain calls i915_gem_flush to
3058 * emit an MI_FLUSH and drm_agp_chipset_flush
3059 * 5. Unmapped from GTT
3060 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3061 * flush_domains and invalidate_domains end up both zero
3062 * so no flushing/invalidating happens
3066 * Case 2: The shared render buffer
3070 * 3. Read/written by GPU
3071 * 4. set_domain to (CPU,CPU)
3072 * 5. Read/written by CPU
3073 * 6. Read/written by GPU
3076 * Same as last example, (CPU, CPU)
3078 * Nothing changes (assertions find that it is not in the GPU)
3079 * 3. Read/written by GPU
3080 * execbuffer calls set_domain (RENDER, RENDER)
3081 * flush_domains gets CPU
3082 * invalidate_domains gets GPU
3084 * MI_FLUSH and drm_agp_chipset_flush
3085 * 4. set_domain (CPU, CPU)
3086 * flush_domains gets GPU
3087 * invalidate_domains gets CPU
3088 * wait_rendering (obj) to make sure all drawing is complete.
3089 * This will include an MI_FLUSH to get the data from GPU
3091 * clflush (obj) to invalidate the CPU cache
3092 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3093 * 5. Read/written by CPU
3094 * cache lines are loaded and dirtied
3095 * 6. Read written by GPU
3096 * Same as last GPU access
3098 * Case 3: The constant buffer
3103 * 4. Updated (written) by CPU again
3112 * flush_domains = CPU
3113 * invalidate_domains = RENDER
3116 * drm_agp_chipset_flush
3117 * 4. Updated (written) by CPU again
3119 * flush_domains = 0 (no previous write domain)
3120 * invalidate_domains = 0 (no new read domains)
3123 * flush_domains = CPU
3124 * invalidate_domains = RENDER
3127 * drm_agp_chipset_flush
3130 i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
3132 struct drm_device *dev = obj->dev;
3133 drm_i915_private_t *dev_priv = dev->dev_private;
3134 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3135 uint32_t invalidate_domains = 0;
3136 uint32_t flush_domains = 0;
3137 uint32_t old_read_domains;
3139 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
3140 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
3142 intel_mark_busy(dev, obj);
3145 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
3147 obj->read_domains, obj->pending_read_domains,
3148 obj->write_domain, obj->pending_write_domain);
3151 * If the object isn't moving to a new write domain,
3152 * let the object stay in multiple read domains
3154 if (obj->pending_write_domain == 0)
3155 obj->pending_read_domains |= obj->read_domains;
3157 obj_priv->dirty = 1;
3160 * Flush the current write domain if
3161 * the new read domains don't match. Invalidate
3162 * any read domains which differ from the old
3165 if (obj->write_domain &&
3166 obj->write_domain != obj->pending_read_domains) {
3167 flush_domains |= obj->write_domain;
3168 invalidate_domains |=
3169 obj->pending_read_domains & ~obj->write_domain;
3172 * Invalidate any read caches which may have
3173 * stale data. That is, any new read domains.
3175 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
3176 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
3178 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
3179 __func__, flush_domains, invalidate_domains);
3181 i915_gem_clflush_object(obj);
3184 old_read_domains = obj->read_domains;
3186 /* The actual obj->write_domain will be updated with
3187 * pending_write_domain after we emit the accumulated flush for all
3188 * of our domain changes in execbuffers (which clears objects'
3189 * write_domains). So if we have a current write domain that we
3190 * aren't changing, set pending_write_domain to that.
3192 if (flush_domains == 0 && obj->pending_write_domain == 0)
3193 obj->pending_write_domain = obj->write_domain;
3194 obj->read_domains = obj->pending_read_domains;
3196 if (flush_domains & I915_GEM_GPU_DOMAINS) {
3197 if (obj_priv->ring == &dev_priv->render_ring)
3198 dev_priv->flush_rings |= FLUSH_RENDER_RING;
3199 else if (obj_priv->ring == &dev_priv->bsd_ring)
3200 dev_priv->flush_rings |= FLUSH_BSD_RING;
3203 dev->invalidate_domains |= invalidate_domains;
3204 dev->flush_domains |= flush_domains;
3206 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3208 obj->read_domains, obj->write_domain,
3209 dev->invalidate_domains, dev->flush_domains);
3212 trace_i915_gem_object_change_domain(obj,
3218 * Moves the object from a partially CPU read to a full one.
3220 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3221 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
3224 i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
3226 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3228 if (!obj_priv->page_cpu_valid)
3231 /* If we're partially in the CPU read domain, finish moving it in.
3233 if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3236 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3237 if (obj_priv->page_cpu_valid[i])
3239 drm_clflush_pages(obj_priv->pages + i, 1);
3243 /* Free the page_cpu_valid mappings which are now stale, whether
3244 * or not we've got I915_GEM_DOMAIN_CPU.
3246 kfree(obj_priv->page_cpu_valid);
3247 obj_priv->page_cpu_valid = NULL;
3251 * Set the CPU read domain on a range of the object.
3253 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3254 * not entirely valid. The page_cpu_valid member of the object flags which
3255 * pages have been flushed, and will be respected by
3256 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3257 * of the whole object.
3259 * This function returns when the move is complete, including waiting on
3263 i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3264 uint64_t offset, uint64_t size)
3266 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3267 uint32_t old_read_domains;
3270 if (offset == 0 && size == obj->size)
3271 return i915_gem_object_set_to_cpu_domain(obj, 0);
3273 ret = i915_gem_object_flush_gpu_write_domain(obj);
3277 /* Wait on any GPU rendering and flushing to occur. */
3278 ret = i915_gem_object_wait_rendering(obj);
3281 i915_gem_object_flush_gtt_write_domain(obj);
3283 /* If we're already fully in the CPU read domain, we're done. */
3284 if (obj_priv->page_cpu_valid == NULL &&
3285 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3288 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3289 * newly adding I915_GEM_DOMAIN_CPU
3291 if (obj_priv->page_cpu_valid == NULL) {
3292 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3294 if (obj_priv->page_cpu_valid == NULL)
3296 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3297 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
3299 /* Flush the cache on any pages that are still invalid from the CPU's
3302 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3304 if (obj_priv->page_cpu_valid[i])
3307 drm_clflush_pages(obj_priv->pages + i, 1);
3309 obj_priv->page_cpu_valid[i] = 1;
3312 /* It should now be out of any other write domains, and we can update
3313 * the domain values for our changes.
3315 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3317 old_read_domains = obj->read_domains;
3318 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3320 trace_i915_gem_object_change_domain(obj,
3328 * Pin an object to the GTT and evaluate the relocations landing in it.
3331 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3332 struct drm_file *file_priv,
3333 struct drm_i915_gem_exec_object2 *entry,
3334 struct drm_i915_gem_relocation_entry *relocs)
3336 struct drm_device *dev = obj->dev;
3337 drm_i915_private_t *dev_priv = dev->dev_private;
3338 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3340 void __iomem *reloc_page;
3343 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3344 obj_priv->tiling_mode != I915_TILING_NONE;
3346 /* Check fence reg constraints and rebind if necessary */
3348 !i915_gem_object_fence_offset_ok(obj,
3349 obj_priv->tiling_mode)) {
3350 ret = i915_gem_object_unbind(obj);
3355 /* Choose the GTT offset for our buffer and put it there. */
3356 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3361 * Pre-965 chips need a fence register set up in order to
3362 * properly handle blits to/from tiled surfaces.
3365 ret = i915_gem_object_get_fence_reg(obj);
3367 i915_gem_object_unpin(obj);
3372 entry->offset = obj_priv->gtt_offset;
3374 /* Apply the relocations, using the GTT aperture to avoid cache
3375 * flushing requirements.
3377 for (i = 0; i < entry->relocation_count; i++) {
3378 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
3379 struct drm_gem_object *target_obj;
3380 struct drm_i915_gem_object *target_obj_priv;
3381 uint32_t reloc_val, reloc_offset;
3382 uint32_t __iomem *reloc_entry;
3384 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
3385 reloc->target_handle);
3386 if (target_obj == NULL) {
3387 i915_gem_object_unpin(obj);
3390 target_obj_priv = to_intel_bo(target_obj);
3393 DRM_INFO("%s: obj %p offset %08x target %d "
3394 "read %08x write %08x gtt %08x "
3395 "presumed %08x delta %08x\n",
3398 (int) reloc->offset,
3399 (int) reloc->target_handle,
3400 (int) reloc->read_domains,
3401 (int) reloc->write_domain,
3402 (int) target_obj_priv->gtt_offset,
3403 (int) reloc->presumed_offset,
3407 /* The target buffer should have appeared before us in the
3408 * exec_object list, so it should have a GTT space bound by now.
3410 if (target_obj_priv->gtt_space == NULL) {
3411 DRM_ERROR("No GTT space found for object %d\n",
3412 reloc->target_handle);
3413 drm_gem_object_unreference(target_obj);
3414 i915_gem_object_unpin(obj);
3418 /* Validate that the target is in a valid r/w GPU domain */
3419 if (reloc->write_domain & (reloc->write_domain - 1)) {
3420 DRM_ERROR("reloc with multiple write domains: "
3421 "obj %p target %d offset %d "
3422 "read %08x write %08x",
3423 obj, reloc->target_handle,
3424 (int) reloc->offset,
3425 reloc->read_domains,
3426 reloc->write_domain);
3429 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3430 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
3431 DRM_ERROR("reloc with read/write CPU domains: "
3432 "obj %p target %d offset %d "
3433 "read %08x write %08x",
3434 obj, reloc->target_handle,
3435 (int) reloc->offset,
3436 reloc->read_domains,
3437 reloc->write_domain);
3438 drm_gem_object_unreference(target_obj);
3439 i915_gem_object_unpin(obj);
3442 if (reloc->write_domain && target_obj->pending_write_domain &&
3443 reloc->write_domain != target_obj->pending_write_domain) {
3444 DRM_ERROR("Write domain conflict: "
3445 "obj %p target %d offset %d "
3446 "new %08x old %08x\n",
3447 obj, reloc->target_handle,
3448 (int) reloc->offset,
3449 reloc->write_domain,
3450 target_obj->pending_write_domain);
3451 drm_gem_object_unreference(target_obj);
3452 i915_gem_object_unpin(obj);
3456 target_obj->pending_read_domains |= reloc->read_domains;
3457 target_obj->pending_write_domain |= reloc->write_domain;
3459 /* If the relocation already has the right value in it, no
3460 * more work needs to be done.
3462 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
3463 drm_gem_object_unreference(target_obj);
3467 /* Check that the relocation address is valid... */
3468 if (reloc->offset > obj->size - 4) {
3469 DRM_ERROR("Relocation beyond object bounds: "
3470 "obj %p target %d offset %d size %d.\n",
3471 obj, reloc->target_handle,
3472 (int) reloc->offset, (int) obj->size);
3473 drm_gem_object_unreference(target_obj);
3474 i915_gem_object_unpin(obj);
3477 if (reloc->offset & 3) {
3478 DRM_ERROR("Relocation not 4-byte aligned: "
3479 "obj %p target %d offset %d.\n",
3480 obj, reloc->target_handle,
3481 (int) reloc->offset);
3482 drm_gem_object_unreference(target_obj);
3483 i915_gem_object_unpin(obj);
3487 /* and points to somewhere within the target object. */
3488 if (reloc->delta >= target_obj->size) {
3489 DRM_ERROR("Relocation beyond target object bounds: "
3490 "obj %p target %d delta %d size %d.\n",
3491 obj, reloc->target_handle,
3492 (int) reloc->delta, (int) target_obj->size);
3493 drm_gem_object_unreference(target_obj);
3494 i915_gem_object_unpin(obj);
3498 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3500 drm_gem_object_unreference(target_obj);
3501 i915_gem_object_unpin(obj);
3505 /* Map the page containing the relocation we're going to
3508 reloc_offset = obj_priv->gtt_offset + reloc->offset;
3509 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3513 reloc_entry = (uint32_t __iomem *)(reloc_page +
3514 (reloc_offset & (PAGE_SIZE - 1)));
3515 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
3518 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
3519 obj, (unsigned int) reloc->offset,
3520 readl(reloc_entry), reloc_val);
3522 writel(reloc_val, reloc_entry);
3523 io_mapping_unmap_atomic(reloc_page, KM_USER0);
3525 /* The updated presumed offset for this entry will be
3526 * copied back out to the user.
3528 reloc->presumed_offset = target_obj_priv->gtt_offset;
3530 drm_gem_object_unreference(target_obj);
3535 i915_gem_dump_object(obj, 128, __func__, ~0);
3540 /* Throttle our rendering by waiting until the ring has completed our requests
3541 * emitted over 20 msec ago.
3543 * Note that if we were to use the current jiffies each time around the loop,
3544 * we wouldn't escape the function with any frames outstanding if the time to
3545 * render a frame was over 20ms.
3547 * This should get us reasonable parallelism between CPU and GPU but also
3548 * relatively low latency when blocking on a particular request to finish.
3551 i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3553 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3555 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
3557 mutex_lock(&dev->struct_mutex);
3558 while (!list_empty(&i915_file_priv->mm.request_list)) {
3559 struct drm_i915_gem_request *request;
3561 request = list_first_entry(&i915_file_priv->mm.request_list,
3562 struct drm_i915_gem_request,
3565 if (time_after_eq(request->emitted_jiffies, recent_enough))
3568 ret = i915_wait_request(dev, request->seqno, request->ring);
3572 mutex_unlock(&dev->struct_mutex);
3578 i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
3579 uint32_t buffer_count,
3580 struct drm_i915_gem_relocation_entry **relocs)
3582 uint32_t reloc_count = 0, reloc_index = 0, i;
3586 for (i = 0; i < buffer_count; i++) {
3587 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3589 reloc_count += exec_list[i].relocation_count;
3592 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
3593 if (*relocs == NULL) {
3594 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
3598 for (i = 0; i < buffer_count; i++) {
3599 struct drm_i915_gem_relocation_entry __user *user_relocs;
3601 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3603 ret = copy_from_user(&(*relocs)[reloc_index],
3605 exec_list[i].relocation_count *
3608 drm_free_large(*relocs);
3613 reloc_index += exec_list[i].relocation_count;
3620 i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
3621 uint32_t buffer_count,
3622 struct drm_i915_gem_relocation_entry *relocs)
3624 uint32_t reloc_count = 0, i;
3630 for (i = 0; i < buffer_count; i++) {
3631 struct drm_i915_gem_relocation_entry __user *user_relocs;
3634 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3636 unwritten = copy_to_user(user_relocs,
3637 &relocs[reloc_count],
3638 exec_list[i].relocation_count *
3646 reloc_count += exec_list[i].relocation_count;
3650 drm_free_large(relocs);
3656 i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
3657 uint64_t exec_offset)
3659 uint32_t exec_start, exec_len;
3661 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3662 exec_len = (uint32_t) exec->batch_len;
3664 if ((exec_start | exec_len) & 0x7)
3674 i915_gem_wait_for_pending_flip(struct drm_device *dev,
3675 struct drm_gem_object **object_list,
3678 drm_i915_private_t *dev_priv = dev->dev_private;
3679 struct drm_i915_gem_object *obj_priv;
3684 prepare_to_wait(&dev_priv->pending_flip_queue,
3685 &wait, TASK_INTERRUPTIBLE);
3686 for (i = 0; i < count; i++) {
3687 obj_priv = to_intel_bo(object_list[i]);
3688 if (atomic_read(&obj_priv->pending_flip) > 0)
3694 if (!signal_pending(current)) {
3695 mutex_unlock(&dev->struct_mutex);
3697 mutex_lock(&dev->struct_mutex);
3703 finish_wait(&dev_priv->pending_flip_queue, &wait);
3710 i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3711 struct drm_file *file_priv,
3712 struct drm_i915_gem_execbuffer2 *args,
3713 struct drm_i915_gem_exec_object2 *exec_list)
3715 drm_i915_private_t *dev_priv = dev->dev_private;
3716 struct drm_gem_object **object_list = NULL;
3717 struct drm_gem_object *batch_obj;
3718 struct drm_i915_gem_object *obj_priv;
3719 struct drm_clip_rect *cliprects = NULL;
3720 struct drm_i915_gem_relocation_entry *relocs = NULL;
3721 int ret = 0, ret2, i, pinned = 0;
3722 uint64_t exec_offset;
3723 uint32_t seqno, flush_domains, reloc_index;
3724 int pin_tries, flips;
3726 struct intel_ring_buffer *ring = NULL;
3729 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3730 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3732 if (args->flags & I915_EXEC_BSD) {
3733 if (!HAS_BSD(dev)) {
3734 DRM_ERROR("execbuf with wrong flag\n");
3737 ring = &dev_priv->bsd_ring;
3739 ring = &dev_priv->render_ring;
3742 if (args->buffer_count < 1) {
3743 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3746 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
3747 if (object_list == NULL) {
3748 DRM_ERROR("Failed to allocate object list for %d buffers\n",
3749 args->buffer_count);
3754 if (args->num_cliprects != 0) {
3755 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3757 if (cliprects == NULL) {
3762 ret = copy_from_user(cliprects,
3763 (struct drm_clip_rect __user *)
3764 (uintptr_t) args->cliprects_ptr,
3765 sizeof(*cliprects) * args->num_cliprects);
3767 DRM_ERROR("copy %d cliprects failed: %d\n",
3768 args->num_cliprects, ret);
3773 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3778 mutex_lock(&dev->struct_mutex);
3780 i915_verify_inactive(dev, __FILE__, __LINE__);
3782 if (atomic_read(&dev_priv->mm.wedged)) {
3783 mutex_unlock(&dev->struct_mutex);
3788 if (dev_priv->mm.suspended) {
3789 mutex_unlock(&dev->struct_mutex);
3794 /* Look up object handles */
3796 for (i = 0; i < args->buffer_count; i++) {
3797 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3798 exec_list[i].handle);
3799 if (object_list[i] == NULL) {
3800 DRM_ERROR("Invalid object handle %d at index %d\n",
3801 exec_list[i].handle, i);
3802 /* prevent error path from reading uninitialized data */
3803 args->buffer_count = i + 1;
3808 obj_priv = to_intel_bo(object_list[i]);
3809 if (obj_priv->in_execbuffer) {
3810 DRM_ERROR("Object %p appears more than once in object list\n",
3812 /* prevent error path from reading uninitialized data */
3813 args->buffer_count = i + 1;
3817 obj_priv->in_execbuffer = true;
3818 flips += atomic_read(&obj_priv->pending_flip);
3822 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3823 args->buffer_count);
3828 /* Pin and relocate */
3829 for (pin_tries = 0; ; pin_tries++) {
3833 for (i = 0; i < args->buffer_count; i++) {
3834 object_list[i]->pending_read_domains = 0;
3835 object_list[i]->pending_write_domain = 0;
3836 ret = i915_gem_object_pin_and_relocate(object_list[i],
3839 &relocs[reloc_index]);
3843 reloc_index += exec_list[i].relocation_count;
3849 /* error other than GTT full, or we've already tried again */
3850 if (ret != -ENOSPC || pin_tries >= 1) {
3851 if (ret != -ERESTARTSYS) {
3852 unsigned long long total_size = 0;
3854 for (i = 0; i < args->buffer_count; i++) {
3855 obj_priv = to_intel_bo(object_list[i]);
3857 total_size += object_list[i]->size;
3859 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3860 obj_priv->tiling_mode != I915_TILING_NONE;
3862 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
3863 pinned+1, args->buffer_count,
3864 total_size, num_fences,
3866 DRM_ERROR("%d objects [%d pinned], "
3867 "%d object bytes [%d pinned], "
3868 "%d/%d gtt bytes\n",
3869 atomic_read(&dev->object_count),
3870 atomic_read(&dev->pin_count),
3871 atomic_read(&dev->object_memory),
3872 atomic_read(&dev->pin_memory),
3873 atomic_read(&dev->gtt_memory),
3879 /* unpin all of our buffers */
3880 for (i = 0; i < pinned; i++)
3881 i915_gem_object_unpin(object_list[i]);
3884 /* evict everyone we can from the aperture */
3885 ret = i915_gem_evict_everything(dev);
3886 if (ret && ret != -ENOSPC)
3890 /* Set the pending read domains for the batch buffer to COMMAND */
3891 batch_obj = object_list[args->buffer_count-1];
3892 if (batch_obj->pending_write_domain) {
3893 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3897 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
3899 /* Sanity check the batch buffer, prior to moving objects */
3900 exec_offset = exec_list[args->buffer_count - 1].offset;
3901 ret = i915_gem_check_execbuffer (args, exec_offset);
3903 DRM_ERROR("execbuf with invalid offset/length\n");
3907 i915_verify_inactive(dev, __FILE__, __LINE__);
3909 /* Zero the global flush/invalidate flags. These
3910 * will be modified as new domains are computed
3913 dev->invalidate_domains = 0;
3914 dev->flush_domains = 0;
3915 dev_priv->flush_rings = 0;
3917 for (i = 0; i < args->buffer_count; i++) {
3918 struct drm_gem_object *obj = object_list[i];
3920 /* Compute new gpu domains and update invalidate/flush */
3921 i915_gem_object_set_to_gpu_domain(obj);
3924 i915_verify_inactive(dev, __FILE__, __LINE__);
3926 if (dev->invalidate_domains | dev->flush_domains) {
3928 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3930 dev->invalidate_domains,
3931 dev->flush_domains);
3934 dev->invalidate_domains,
3935 dev->flush_domains);
3936 if (dev_priv->flush_rings & FLUSH_RENDER_RING)
3937 (void)i915_add_request(dev, file_priv,
3939 &dev_priv->render_ring);
3940 if (dev_priv->flush_rings & FLUSH_BSD_RING)
3941 (void)i915_add_request(dev, file_priv,
3943 &dev_priv->bsd_ring);
3946 for (i = 0; i < args->buffer_count; i++) {
3947 struct drm_gem_object *obj = object_list[i];
3948 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3949 uint32_t old_write_domain = obj->write_domain;
3951 obj->write_domain = obj->pending_write_domain;
3952 if (obj->write_domain)
3953 list_move_tail(&obj_priv->gpu_write_list,
3954 &dev_priv->mm.gpu_write_list);
3956 list_del_init(&obj_priv->gpu_write_list);
3958 trace_i915_gem_object_change_domain(obj,
3963 i915_verify_inactive(dev, __FILE__, __LINE__);
3966 for (i = 0; i < args->buffer_count; i++) {
3967 i915_gem_object_check_coherency(object_list[i],
3968 exec_list[i].handle);
3973 i915_gem_dump_object(batch_obj,
3979 /* Exec the batchbuffer */
3980 ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3981 cliprects, exec_offset);
3983 DRM_ERROR("dispatch failed %d\n", ret);
3988 * Ensure that the commands in the batch buffer are
3989 * finished before the interrupt fires
3991 flush_domains = i915_retire_commands(dev, ring);
3993 i915_verify_inactive(dev, __FILE__, __LINE__);
3996 * Get a seqno representing the execution of the current buffer,
3997 * which we can wait on. We would like to mitigate these interrupts,
3998 * likely by only creating seqnos occasionally (so that we have
3999 * *some* interrupts representing completion of buffers that we can
4000 * wait on when trying to clear up gtt space).
4002 seqno = i915_add_request(dev, file_priv, flush_domains, ring);
4004 for (i = 0; i < args->buffer_count; i++) {
4005 struct drm_gem_object *obj = object_list[i];
4006 obj_priv = to_intel_bo(obj);
4008 i915_gem_object_move_to_active(obj, seqno, ring);
4010 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
4014 i915_dump_lru(dev, __func__);
4017 i915_verify_inactive(dev, __FILE__, __LINE__);
4020 for (i = 0; i < pinned; i++)
4021 i915_gem_object_unpin(object_list[i]);
4023 for (i = 0; i < args->buffer_count; i++) {
4024 if (object_list[i]) {
4025 obj_priv = to_intel_bo(object_list[i]);
4026 obj_priv->in_execbuffer = false;
4028 drm_gem_object_unreference(object_list[i]);
4031 mutex_unlock(&dev->struct_mutex);
4034 /* Copy the updated relocations out regardless of current error
4035 * state. Failure to update the relocs would mean that the next
4036 * time userland calls execbuf, it would do so with presumed offset
4037 * state that didn't match the actual object state.
4039 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
4042 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
4048 drm_free_large(object_list);
4055 * Legacy execbuffer just creates an exec2 list from the original exec object
4056 * list array and passes it to the real function.
4059 i915_gem_execbuffer(struct drm_device *dev, void *data,
4060 struct drm_file *file_priv)
4062 struct drm_i915_gem_execbuffer *args = data;
4063 struct drm_i915_gem_execbuffer2 exec2;
4064 struct drm_i915_gem_exec_object *exec_list = NULL;
4065 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4069 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4070 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4073 if (args->buffer_count < 1) {
4074 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4078 /* Copy in the exec list from userland */
4079 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4080 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4081 if (exec_list == NULL || exec2_list == NULL) {
4082 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4083 args->buffer_count);
4084 drm_free_large(exec_list);
4085 drm_free_large(exec2_list);
4088 ret = copy_from_user(exec_list,
4089 (struct drm_i915_relocation_entry __user *)
4090 (uintptr_t) args->buffers_ptr,
4091 sizeof(*exec_list) * args->buffer_count);
4093 DRM_ERROR("copy %d exec entries failed %d\n",
4094 args->buffer_count, ret);
4095 drm_free_large(exec_list);
4096 drm_free_large(exec2_list);
4100 for (i = 0; i < args->buffer_count; i++) {
4101 exec2_list[i].handle = exec_list[i].handle;
4102 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4103 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4104 exec2_list[i].alignment = exec_list[i].alignment;
4105 exec2_list[i].offset = exec_list[i].offset;
4107 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4109 exec2_list[i].flags = 0;
4112 exec2.buffers_ptr = args->buffers_ptr;
4113 exec2.buffer_count = args->buffer_count;
4114 exec2.batch_start_offset = args->batch_start_offset;
4115 exec2.batch_len = args->batch_len;
4116 exec2.DR1 = args->DR1;
4117 exec2.DR4 = args->DR4;
4118 exec2.num_cliprects = args->num_cliprects;
4119 exec2.cliprects_ptr = args->cliprects_ptr;
4120 exec2.flags = I915_EXEC_RENDER;
4122 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
4124 /* Copy the new buffer offsets back to the user's exec list. */
4125 for (i = 0; i < args->buffer_count; i++)
4126 exec_list[i].offset = exec2_list[i].offset;
4127 /* ... and back out to userspace */
4128 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4129 (uintptr_t) args->buffers_ptr,
4131 sizeof(*exec_list) * args->buffer_count);
4134 DRM_ERROR("failed to copy %d exec entries "
4135 "back to user (%d)\n",
4136 args->buffer_count, ret);
4140 drm_free_large(exec_list);
4141 drm_free_large(exec2_list);
4146 i915_gem_execbuffer2(struct drm_device *dev, void *data,
4147 struct drm_file *file_priv)
4149 struct drm_i915_gem_execbuffer2 *args = data;
4150 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4154 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4155 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4158 if (args->buffer_count < 1) {
4159 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4163 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4164 if (exec2_list == NULL) {
4165 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4166 args->buffer_count);
4169 ret = copy_from_user(exec2_list,
4170 (struct drm_i915_relocation_entry __user *)
4171 (uintptr_t) args->buffers_ptr,
4172 sizeof(*exec2_list) * args->buffer_count);
4174 DRM_ERROR("copy %d exec entries failed %d\n",
4175 args->buffer_count, ret);
4176 drm_free_large(exec2_list);
4180 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4182 /* Copy the new buffer offsets back to the user's exec list. */
4183 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4184 (uintptr_t) args->buffers_ptr,
4186 sizeof(*exec2_list) * args->buffer_count);
4189 DRM_ERROR("failed to copy %d exec entries "
4190 "back to user (%d)\n",
4191 args->buffer_count, ret);
4195 drm_free_large(exec2_list);
4200 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
4202 struct drm_device *dev = obj->dev;
4203 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4206 BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4208 i915_verify_inactive(dev, __FILE__, __LINE__);
4210 if (obj_priv->gtt_space != NULL) {
4212 alignment = i915_gem_get_gtt_alignment(obj);
4213 if (obj_priv->gtt_offset & (alignment - 1)) {
4214 WARN(obj_priv->pin_count,
4215 "bo is already pinned with incorrect alignment:"
4216 " offset=%x, req.alignment=%x\n",
4217 obj_priv->gtt_offset, alignment);
4218 ret = i915_gem_object_unbind(obj);
4224 if (obj_priv->gtt_space == NULL) {
4225 ret = i915_gem_object_bind_to_gtt(obj, alignment);
4230 obj_priv->pin_count++;
4232 /* If the object is not active and not pending a flush,
4233 * remove it from the inactive list
4235 if (obj_priv->pin_count == 1) {
4236 atomic_inc(&dev->pin_count);
4237 atomic_add(obj->size, &dev->pin_memory);
4238 if (!obj_priv->active &&
4239 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4240 list_del_init(&obj_priv->list);
4242 i915_verify_inactive(dev, __FILE__, __LINE__);
4248 i915_gem_object_unpin(struct drm_gem_object *obj)
4250 struct drm_device *dev = obj->dev;
4251 drm_i915_private_t *dev_priv = dev->dev_private;
4252 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4254 i915_verify_inactive(dev, __FILE__, __LINE__);
4255 obj_priv->pin_count--;
4256 BUG_ON(obj_priv->pin_count < 0);
4257 BUG_ON(obj_priv->gtt_space == NULL);
4259 /* If the object is no longer pinned, and is
4260 * neither active nor being flushed, then stick it on
4263 if (obj_priv->pin_count == 0) {
4264 if (!obj_priv->active &&
4265 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
4266 list_move_tail(&obj_priv->list,
4267 &dev_priv->mm.inactive_list);
4268 atomic_dec(&dev->pin_count);
4269 atomic_sub(obj->size, &dev->pin_memory);
4271 i915_verify_inactive(dev, __FILE__, __LINE__);
4275 i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4276 struct drm_file *file_priv)
4278 struct drm_i915_gem_pin *args = data;
4279 struct drm_gem_object *obj;
4280 struct drm_i915_gem_object *obj_priv;
4283 mutex_lock(&dev->struct_mutex);
4285 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4287 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4289 mutex_unlock(&dev->struct_mutex);
4292 obj_priv = to_intel_bo(obj);
4294 if (obj_priv->madv != I915_MADV_WILLNEED) {
4295 DRM_ERROR("Attempting to pin a purgeable buffer\n");
4296 drm_gem_object_unreference(obj);
4297 mutex_unlock(&dev->struct_mutex);
4301 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4302 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4304 drm_gem_object_unreference(obj);
4305 mutex_unlock(&dev->struct_mutex);
4309 obj_priv->user_pin_count++;
4310 obj_priv->pin_filp = file_priv;
4311 if (obj_priv->user_pin_count == 1) {
4312 ret = i915_gem_object_pin(obj, args->alignment);
4314 drm_gem_object_unreference(obj);
4315 mutex_unlock(&dev->struct_mutex);
4320 /* XXX - flush the CPU caches for pinned objects
4321 * as the X server doesn't manage domains yet
4323 i915_gem_object_flush_cpu_write_domain(obj);
4324 args->offset = obj_priv->gtt_offset;
4325 drm_gem_object_unreference(obj);
4326 mutex_unlock(&dev->struct_mutex);
4332 i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4333 struct drm_file *file_priv)
4335 struct drm_i915_gem_pin *args = data;
4336 struct drm_gem_object *obj;
4337 struct drm_i915_gem_object *obj_priv;
4339 mutex_lock(&dev->struct_mutex);
4341 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4343 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4345 mutex_unlock(&dev->struct_mutex);
4349 obj_priv = to_intel_bo(obj);
4350 if (obj_priv->pin_filp != file_priv) {
4351 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4353 drm_gem_object_unreference(obj);
4354 mutex_unlock(&dev->struct_mutex);
4357 obj_priv->user_pin_count--;
4358 if (obj_priv->user_pin_count == 0) {
4359 obj_priv->pin_filp = NULL;
4360 i915_gem_object_unpin(obj);
4363 drm_gem_object_unreference(obj);
4364 mutex_unlock(&dev->struct_mutex);
4369 i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4370 struct drm_file *file_priv)
4372 struct drm_i915_gem_busy *args = data;
4373 struct drm_gem_object *obj;
4374 struct drm_i915_gem_object *obj_priv;
4376 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4378 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4383 mutex_lock(&dev->struct_mutex);
4385 /* Count all active objects as busy, even if they are currently not used
4386 * by the gpu. Users of this interface expect objects to eventually
4387 * become non-busy without any further actions, therefore emit any
4388 * necessary flushes here.
4390 obj_priv = to_intel_bo(obj);
4391 args->busy = obj_priv->active;
4393 /* Unconditionally flush objects, even when the gpu still uses this
4394 * object. Userspace calling this function indicates that it wants to
4395 * use this buffer rather sooner than later, so issuing the required
4396 * flush earlier is beneficial.
4398 if (obj->write_domain) {
4399 i915_gem_flush(dev, 0, obj->write_domain);
4400 (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4403 /* Update the active list for the hardware's current position.
4404 * Otherwise this only updates on a delayed timer or when irqs
4405 * are actually unmasked, and our working set ends up being
4406 * larger than required.
4408 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4410 args->busy = obj_priv->active;
4413 drm_gem_object_unreference(obj);
4414 mutex_unlock(&dev->struct_mutex);
4419 i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4420 struct drm_file *file_priv)
4422 return i915_gem_ring_throttle(dev, file_priv);
4426 i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4427 struct drm_file *file_priv)
4429 struct drm_i915_gem_madvise *args = data;
4430 struct drm_gem_object *obj;
4431 struct drm_i915_gem_object *obj_priv;
4433 switch (args->madv) {
4434 case I915_MADV_DONTNEED:
4435 case I915_MADV_WILLNEED:
4441 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4443 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4448 mutex_lock(&dev->struct_mutex);
4449 obj_priv = to_intel_bo(obj);
4451 if (obj_priv->pin_count) {
4452 drm_gem_object_unreference(obj);
4453 mutex_unlock(&dev->struct_mutex);
4455 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4459 if (obj_priv->madv != __I915_MADV_PURGED)
4460 obj_priv->madv = args->madv;
4462 /* if the object is no longer bound, discard its backing storage */
4463 if (i915_gem_object_is_purgeable(obj_priv) &&
4464 obj_priv->gtt_space == NULL)
4465 i915_gem_object_truncate(obj);
4467 args->retained = obj_priv->madv != __I915_MADV_PURGED;
4469 drm_gem_object_unreference(obj);
4470 mutex_unlock(&dev->struct_mutex);
4475 struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4478 struct drm_i915_gem_object *obj;
4480 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4484 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4489 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4490 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
4492 obj->agp_type = AGP_USER_MEMORY;
4493 obj->base.driver_private = NULL;
4494 obj->fence_reg = I915_FENCE_REG_NONE;
4495 INIT_LIST_HEAD(&obj->list);
4496 INIT_LIST_HEAD(&obj->gpu_write_list);
4497 obj->madv = I915_MADV_WILLNEED;
4499 trace_i915_gem_object_create(&obj->base);
4504 int i915_gem_init_object(struct drm_gem_object *obj)
4511 static void i915_gem_free_object_tail(struct drm_gem_object *obj)
4513 struct drm_device *dev = obj->dev;
4514 drm_i915_private_t *dev_priv = dev->dev_private;
4515 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4518 ret = i915_gem_object_unbind(obj);
4519 if (ret == -ERESTARTSYS) {
4520 list_move(&obj_priv->list,
4521 &dev_priv->mm.deferred_free_list);
4525 if (obj_priv->mmap_offset)
4526 i915_gem_free_mmap_offset(obj);
4528 drm_gem_object_release(obj);
4530 kfree(obj_priv->page_cpu_valid);
4531 kfree(obj_priv->bit_17);
4535 void i915_gem_free_object(struct drm_gem_object *obj)
4537 struct drm_device *dev = obj->dev;
4538 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4540 trace_i915_gem_object_destroy(obj);
4542 while (obj_priv->pin_count > 0)
4543 i915_gem_object_unpin(obj);
4545 if (obj_priv->phys_obj)
4546 i915_gem_detach_phys_object(dev, obj);
4548 i915_gem_free_object_tail(obj);
4551 /** Unbinds all inactive objects. */
4553 i915_gem_evict_from_inactive_list(struct drm_device *dev)
4555 drm_i915_private_t *dev_priv = dev->dev_private;
4557 while (!list_empty(&dev_priv->mm.inactive_list)) {
4558 struct drm_gem_object *obj;
4561 obj = &list_first_entry(&dev_priv->mm.inactive_list,
4562 struct drm_i915_gem_object,
4565 ret = i915_gem_object_unbind(obj);
4567 DRM_ERROR("Error unbinding object: %d\n", ret);
4576 i915_gem_idle(struct drm_device *dev)
4578 drm_i915_private_t *dev_priv = dev->dev_private;
4581 mutex_lock(&dev->struct_mutex);
4583 if (dev_priv->mm.suspended ||
4584 (dev_priv->render_ring.gem_object == NULL) ||
4586 dev_priv->bsd_ring.gem_object == NULL)) {
4587 mutex_unlock(&dev->struct_mutex);
4591 ret = i915_gpu_idle(dev);
4593 mutex_unlock(&dev->struct_mutex);
4597 /* Under UMS, be paranoid and evict. */
4598 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4599 ret = i915_gem_evict_from_inactive_list(dev);
4601 mutex_unlock(&dev->struct_mutex);
4606 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4607 * We need to replace this with a semaphore, or something.
4608 * And not confound mm.suspended!
4610 dev_priv->mm.suspended = 1;
4611 del_timer(&dev_priv->hangcheck_timer);
4613 i915_kernel_lost_context(dev);
4614 i915_gem_cleanup_ringbuffer(dev);
4616 mutex_unlock(&dev->struct_mutex);
4618 /* Cancel the retire work handler, which should be idle now. */
4619 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4625 * 965+ support PIPE_CONTROL commands, which provide finer grained control
4626 * over cache flushing.
4629 i915_gem_init_pipe_control(struct drm_device *dev)
4631 drm_i915_private_t *dev_priv = dev->dev_private;
4632 struct drm_gem_object *obj;
4633 struct drm_i915_gem_object *obj_priv;
4636 obj = i915_gem_alloc_object(dev, 4096);
4638 DRM_ERROR("Failed to allocate seqno page\n");
4642 obj_priv = to_intel_bo(obj);
4643 obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4645 ret = i915_gem_object_pin(obj, 4096);
4649 dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4650 dev_priv->seqno_page = kmap(obj_priv->pages[0]);
4651 if (dev_priv->seqno_page == NULL)
4654 dev_priv->seqno_obj = obj;
4655 memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4660 i915_gem_object_unpin(obj);
4662 drm_gem_object_unreference(obj);
4669 i915_gem_cleanup_pipe_control(struct drm_device *dev)
4671 drm_i915_private_t *dev_priv = dev->dev_private;
4672 struct drm_gem_object *obj;
4673 struct drm_i915_gem_object *obj_priv;
4675 obj = dev_priv->seqno_obj;
4676 obj_priv = to_intel_bo(obj);
4677 kunmap(obj_priv->pages[0]);
4678 i915_gem_object_unpin(obj);
4679 drm_gem_object_unreference(obj);
4680 dev_priv->seqno_obj = NULL;
4682 dev_priv->seqno_page = NULL;
4686 i915_gem_init_ringbuffer(struct drm_device *dev)
4688 drm_i915_private_t *dev_priv = dev->dev_private;
4691 dev_priv->render_ring = render_ring;
4693 if (!I915_NEED_GFX_HWS(dev)) {
4694 dev_priv->render_ring.status_page.page_addr
4695 = dev_priv->status_page_dmah->vaddr;
4696 memset(dev_priv->render_ring.status_page.page_addr,
4700 if (HAS_PIPE_CONTROL(dev)) {
4701 ret = i915_gem_init_pipe_control(dev);
4706 ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
4708 goto cleanup_pipe_control;
4711 dev_priv->bsd_ring = bsd_ring;
4712 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
4714 goto cleanup_render_ring;
4717 dev_priv->next_seqno = 1;
4721 cleanup_render_ring:
4722 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4723 cleanup_pipe_control:
4724 if (HAS_PIPE_CONTROL(dev))
4725 i915_gem_cleanup_pipe_control(dev);
4730 i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4732 drm_i915_private_t *dev_priv = dev->dev_private;
4734 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4736 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
4737 if (HAS_PIPE_CONTROL(dev))
4738 i915_gem_cleanup_pipe_control(dev);
4742 i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4743 struct drm_file *file_priv)
4745 drm_i915_private_t *dev_priv = dev->dev_private;
4748 if (drm_core_check_feature(dev, DRIVER_MODESET))
4751 if (atomic_read(&dev_priv->mm.wedged)) {
4752 DRM_ERROR("Reenabling wedged hardware, good luck\n");
4753 atomic_set(&dev_priv->mm.wedged, 0);
4756 mutex_lock(&dev->struct_mutex);
4757 dev_priv->mm.suspended = 0;
4759 ret = i915_gem_init_ringbuffer(dev);
4761 mutex_unlock(&dev->struct_mutex);
4765 spin_lock(&dev_priv->mm.active_list_lock);
4766 BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
4767 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
4768 spin_unlock(&dev_priv->mm.active_list_lock);
4770 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4771 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4772 BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
4773 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
4774 mutex_unlock(&dev->struct_mutex);
4776 ret = drm_irq_install(dev);
4778 goto cleanup_ringbuffer;
4783 mutex_lock(&dev->struct_mutex);
4784 i915_gem_cleanup_ringbuffer(dev);
4785 dev_priv->mm.suspended = 1;
4786 mutex_unlock(&dev->struct_mutex);
4792 i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4793 struct drm_file *file_priv)
4795 if (drm_core_check_feature(dev, DRIVER_MODESET))
4798 drm_irq_uninstall(dev);
4799 return i915_gem_idle(dev);
4803 i915_gem_lastclose(struct drm_device *dev)
4807 if (drm_core_check_feature(dev, DRIVER_MODESET))
4810 ret = i915_gem_idle(dev);
4812 DRM_ERROR("failed to idle hardware: %d\n", ret);
4816 i915_gem_load(struct drm_device *dev)
4819 drm_i915_private_t *dev_priv = dev->dev_private;
4821 spin_lock_init(&dev_priv->mm.active_list_lock);
4822 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4823 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
4824 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4825 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
4826 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
4827 INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4828 INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
4830 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4831 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4833 for (i = 0; i < 16; i++)
4834 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
4835 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4836 i915_gem_retire_work_handler);
4837 spin_lock(&shrink_list_lock);
4838 list_add(&dev_priv->mm.shrink_list, &shrink_list);
4839 spin_unlock(&shrink_list_lock);
4841 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4843 u32 tmp = I915_READ(MI_ARB_STATE);
4844 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4845 /* arb state is a masked write, so set bit + bit in mask */
4846 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4847 I915_WRITE(MI_ARB_STATE, tmp);
4851 /* Old X drivers will take 0-2 for front, back, depth buffers */
4852 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4853 dev_priv->fence_reg_start = 3;
4855 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4856 dev_priv->num_fence_regs = 16;
4858 dev_priv->num_fence_regs = 8;
4860 /* Initialize fence registers to zero */
4861 if (IS_I965G(dev)) {
4862 for (i = 0; i < 16; i++)
4863 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4865 for (i = 0; i < 8; i++)
4866 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4867 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4868 for (i = 0; i < 8; i++)
4869 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4871 i915_gem_detect_bit_6_swizzle(dev);
4872 init_waitqueue_head(&dev_priv->pending_flip_queue);
4876 * Create a physically contiguous memory object for this object
4877 * e.g. for cursor + overlay regs
4879 int i915_gem_init_phys_object(struct drm_device *dev,
4882 drm_i915_private_t *dev_priv = dev->dev_private;
4883 struct drm_i915_gem_phys_object *phys_obj;
4886 if (dev_priv->mm.phys_objs[id - 1] || !size)
4889 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
4895 phys_obj->handle = drm_pci_alloc(dev, size, 0);
4896 if (!phys_obj->handle) {
4901 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4904 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4912 void i915_gem_free_phys_object(struct drm_device *dev, int id)
4914 drm_i915_private_t *dev_priv = dev->dev_private;
4915 struct drm_i915_gem_phys_object *phys_obj;
4917 if (!dev_priv->mm.phys_objs[id - 1])
4920 phys_obj = dev_priv->mm.phys_objs[id - 1];
4921 if (phys_obj->cur_obj) {
4922 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4926 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4928 drm_pci_free(dev, phys_obj->handle);
4930 dev_priv->mm.phys_objs[id - 1] = NULL;
4933 void i915_gem_free_all_phys_object(struct drm_device *dev)
4937 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
4938 i915_gem_free_phys_object(dev, i);
4941 void i915_gem_detach_phys_object(struct drm_device *dev,
4942 struct drm_gem_object *obj)
4944 struct drm_i915_gem_object *obj_priv;
4949 obj_priv = to_intel_bo(obj);
4950 if (!obj_priv->phys_obj)
4953 ret = i915_gem_object_get_pages(obj, 0);
4957 page_count = obj->size / PAGE_SIZE;
4959 for (i = 0; i < page_count; i++) {
4960 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
4961 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4963 memcpy(dst, src, PAGE_SIZE);
4964 kunmap_atomic(dst, KM_USER0);
4966 drm_clflush_pages(obj_priv->pages, page_count);
4967 drm_agp_chipset_flush(dev);
4969 i915_gem_object_put_pages(obj);
4971 obj_priv->phys_obj->cur_obj = NULL;
4972 obj_priv->phys_obj = NULL;
4976 i915_gem_attach_phys_object(struct drm_device *dev,
4977 struct drm_gem_object *obj, int id)
4979 drm_i915_private_t *dev_priv = dev->dev_private;
4980 struct drm_i915_gem_object *obj_priv;
4985 if (id > I915_MAX_PHYS_OBJECT)
4988 obj_priv = to_intel_bo(obj);
4990 if (obj_priv->phys_obj) {
4991 if (obj_priv->phys_obj->id == id)
4993 i915_gem_detach_phys_object(dev, obj);
4997 /* create a new object */
4998 if (!dev_priv->mm.phys_objs[id - 1]) {
4999 ret = i915_gem_init_phys_object(dev, id,
5002 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
5007 /* bind to the object */
5008 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
5009 obj_priv->phys_obj->cur_obj = obj;
5011 ret = i915_gem_object_get_pages(obj, 0);
5013 DRM_ERROR("failed to get page list\n");
5017 page_count = obj->size / PAGE_SIZE;
5019 for (i = 0; i < page_count; i++) {
5020 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
5021 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
5023 memcpy(dst, src, PAGE_SIZE);
5024 kunmap_atomic(src, KM_USER0);
5027 i915_gem_object_put_pages(obj);
5035 i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
5036 struct drm_i915_gem_pwrite *args,
5037 struct drm_file *file_priv)
5039 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
5042 char __user *user_data;
5044 user_data = (char __user *) (uintptr_t) args->data_ptr;
5045 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
5047 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
5048 ret = copy_from_user(obj_addr, user_data, args->size);
5052 drm_agp_chipset_flush(dev);
5056 void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
5058 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
5060 /* Clean up our request list when the client is going away, so that
5061 * later retire_requests won't dereference our soon-to-be-gone
5064 mutex_lock(&dev->struct_mutex);
5065 while (!list_empty(&i915_file_priv->mm.request_list))
5066 list_del_init(i915_file_priv->mm.request_list.next);
5067 mutex_unlock(&dev->struct_mutex);
5071 i915_gpu_is_active(struct drm_device *dev)
5073 drm_i915_private_t *dev_priv = dev->dev_private;
5076 spin_lock(&dev_priv->mm.active_list_lock);
5077 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
5078 list_empty(&dev_priv->render_ring.active_list);
5080 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
5081 spin_unlock(&dev_priv->mm.active_list_lock);
5083 return !lists_empty;
5087 i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
5089 drm_i915_private_t *dev_priv, *next_dev;
5090 struct drm_i915_gem_object *obj_priv, *next_obj;
5092 int would_deadlock = 1;
5094 /* "fast-path" to count number of available objects */
5095 if (nr_to_scan == 0) {
5096 spin_lock(&shrink_list_lock);
5097 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5098 struct drm_device *dev = dev_priv->dev;
5100 if (mutex_trylock(&dev->struct_mutex)) {
5101 list_for_each_entry(obj_priv,
5102 &dev_priv->mm.inactive_list,
5105 mutex_unlock(&dev->struct_mutex);
5108 spin_unlock(&shrink_list_lock);
5110 return (cnt / 100) * sysctl_vfs_cache_pressure;
5113 spin_lock(&shrink_list_lock);
5116 /* first scan for clean buffers */
5117 list_for_each_entry_safe(dev_priv, next_dev,
5118 &shrink_list, mm.shrink_list) {
5119 struct drm_device *dev = dev_priv->dev;
5121 if (! mutex_trylock(&dev->struct_mutex))
5124 spin_unlock(&shrink_list_lock);
5125 i915_gem_retire_requests(dev);
5127 list_for_each_entry_safe(obj_priv, next_obj,
5128 &dev_priv->mm.inactive_list,
5130 if (i915_gem_object_is_purgeable(obj_priv)) {
5131 i915_gem_object_unbind(&obj_priv->base);
5132 if (--nr_to_scan <= 0)
5137 spin_lock(&shrink_list_lock);
5138 mutex_unlock(&dev->struct_mutex);
5142 if (nr_to_scan <= 0)
5146 /* second pass, evict/count anything still on the inactive list */
5147 list_for_each_entry_safe(dev_priv, next_dev,
5148 &shrink_list, mm.shrink_list) {
5149 struct drm_device *dev = dev_priv->dev;
5151 if (! mutex_trylock(&dev->struct_mutex))
5154 spin_unlock(&shrink_list_lock);
5156 list_for_each_entry_safe(obj_priv, next_obj,
5157 &dev_priv->mm.inactive_list,
5159 if (nr_to_scan > 0) {
5160 i915_gem_object_unbind(&obj_priv->base);
5166 spin_lock(&shrink_list_lock);
5167 mutex_unlock(&dev->struct_mutex);
5176 * We are desperate for pages, so as a last resort, wait
5177 * for the GPU to finish and discard whatever we can.
5178 * This has a dramatic impact to reduce the number of
5179 * OOM-killer events whilst running the GPU aggressively.
5181 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5182 struct drm_device *dev = dev_priv->dev;
5184 if (!mutex_trylock(&dev->struct_mutex))
5187 spin_unlock(&shrink_list_lock);
5189 if (i915_gpu_is_active(dev)) {
5194 spin_lock(&shrink_list_lock);
5195 mutex_unlock(&dev->struct_mutex);
5202 spin_unlock(&shrink_list_lock);
5207 return (cnt / 100) * sysctl_vfs_cache_pressure;
5212 static struct shrinker shrinker = {
5213 .shrink = i915_gem_shrink,
5214 .seeks = DEFAULT_SEEKS,
5218 i915_gem_shrinker_init(void)
5220 register_shrinker(&shrinker);
5224 i915_gem_shrinker_exit(void)
5226 unregister_shrinker(&shrinker);