]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/gpu/drm/i915/i915_gem.c
drm/i915: prepare for fair lru eviction
[net-next-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
1c5d22f7 32#include "i915_trace.h"
652c393a 33#include "intel_drv.h"
5a0e3ad6 34#include <linux/slab.h>
673a394b 35#include <linux/swap.h>
79e53945 36#include <linux/pci.h>
673a394b 37
0108a3ed 38static uint32_t i915_gem_get_gtt_alignment(struct drm_gem_object *obj);
2dafb1e0 39static int i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
40static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
41static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
42static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
43 int write);
44static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
45 uint64_t offset,
46 uint64_t size);
47static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
673a394b 48static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
de151cf6
JB
49static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
50 unsigned alignment);
de151cf6 51static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
0108a3ed
DV
52static int i915_gem_evict_something(struct drm_device *dev, int min_size,
53 unsigned alignment);
ab5ee576 54static int i915_gem_evict_from_inactive_list(struct drm_device *dev);
71acb5eb
DA
55static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
56 struct drm_i915_gem_pwrite *args,
57 struct drm_file *file_priv);
be72615b 58static void i915_gem_free_object_tail(struct drm_gem_object *obj);
673a394b 59
31169714
CW
60static LIST_HEAD(shrink_list);
61static DEFINE_SPINLOCK(shrink_list_lock);
62
79e53945
JB
63int i915_gem_do_init(struct drm_device *dev, unsigned long start,
64 unsigned long end)
673a394b
EA
65{
66 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 67
79e53945
JB
68 if (start >= end ||
69 (start & (PAGE_SIZE - 1)) != 0 ||
70 (end & (PAGE_SIZE - 1)) != 0) {
673a394b
EA
71 return -EINVAL;
72 }
73
79e53945
JB
74 drm_mm_init(&dev_priv->mm.gtt_space, start,
75 end - start);
673a394b 76
79e53945
JB
77 dev->gtt_total = (uint32_t) (end - start);
78
79 return 0;
80}
673a394b 81
79e53945
JB
82int
83i915_gem_init_ioctl(struct drm_device *dev, void *data,
84 struct drm_file *file_priv)
85{
86 struct drm_i915_gem_init *args = data;
87 int ret;
88
89 mutex_lock(&dev->struct_mutex);
90 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
673a394b
EA
91 mutex_unlock(&dev->struct_mutex);
92
79e53945 93 return ret;
673a394b
EA
94}
95
5a125c3c
EA
96int
97i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
98 struct drm_file *file_priv)
99{
5a125c3c 100 struct drm_i915_gem_get_aperture *args = data;
5a125c3c
EA
101
102 if (!(dev->driver->driver_features & DRIVER_GEM))
103 return -ENODEV;
104
105 args->aper_size = dev->gtt_total;
2678d9d6
KP
106 args->aper_available_size = (args->aper_size -
107 atomic_read(&dev->pin_memory));
5a125c3c
EA
108
109 return 0;
110}
111
673a394b
EA
112
113/**
114 * Creates a new mm object and returns a handle to it.
115 */
116int
117i915_gem_create_ioctl(struct drm_device *dev, void *data,
118 struct drm_file *file_priv)
119{
120 struct drm_i915_gem_create *args = data;
121 struct drm_gem_object *obj;
a1a2d1d3
PP
122 int ret;
123 u32 handle;
673a394b
EA
124
125 args->size = roundup(args->size, PAGE_SIZE);
126
127 /* Allocate the new object */
ac52bc56 128 obj = i915_gem_alloc_object(dev, args->size);
673a394b
EA
129 if (obj == NULL)
130 return -ENOMEM;
131
132 ret = drm_gem_handle_create(file_priv, obj, &handle);
86f100b1 133 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
134 if (ret)
135 return ret;
136
137 args->handle = handle;
138
139 return 0;
140}
141
eb01459f
EA
142static inline int
143fast_shmem_read(struct page **pages,
144 loff_t page_base, int page_offset,
145 char __user *data,
146 int length)
147{
148 char __iomem *vaddr;
2bc43b5c 149 int unwritten;
eb01459f
EA
150
151 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
152 if (vaddr == NULL)
153 return -ENOMEM;
2bc43b5c 154 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
eb01459f
EA
155 kunmap_atomic(vaddr, KM_USER0);
156
2bc43b5c
FM
157 if (unwritten)
158 return -EFAULT;
159
160 return 0;
eb01459f
EA
161}
162
280b713b
EA
163static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
164{
165 drm_i915_private_t *dev_priv = obj->dev->dev_private;
23010e43 166 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
280b713b
EA
167
168 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
169 obj_priv->tiling_mode != I915_TILING_NONE;
170}
171
99a03df5 172static inline void
40123c1f
EA
173slow_shmem_copy(struct page *dst_page,
174 int dst_offset,
175 struct page *src_page,
176 int src_offset,
177 int length)
178{
179 char *dst_vaddr, *src_vaddr;
180
99a03df5
CW
181 dst_vaddr = kmap(dst_page);
182 src_vaddr = kmap(src_page);
40123c1f
EA
183
184 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
185
99a03df5
CW
186 kunmap(src_page);
187 kunmap(dst_page);
40123c1f
EA
188}
189
99a03df5 190static inline void
280b713b
EA
191slow_shmem_bit17_copy(struct page *gpu_page,
192 int gpu_offset,
193 struct page *cpu_page,
194 int cpu_offset,
195 int length,
196 int is_read)
197{
198 char *gpu_vaddr, *cpu_vaddr;
199
200 /* Use the unswizzled path if this page isn't affected. */
201 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
202 if (is_read)
203 return slow_shmem_copy(cpu_page, cpu_offset,
204 gpu_page, gpu_offset, length);
205 else
206 return slow_shmem_copy(gpu_page, gpu_offset,
207 cpu_page, cpu_offset, length);
208 }
209
99a03df5
CW
210 gpu_vaddr = kmap(gpu_page);
211 cpu_vaddr = kmap(cpu_page);
280b713b
EA
212
213 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
214 * XORing with the other bits (A9 for Y, A9 and A10 for X)
215 */
216 while (length > 0) {
217 int cacheline_end = ALIGN(gpu_offset + 1, 64);
218 int this_length = min(cacheline_end - gpu_offset, length);
219 int swizzled_gpu_offset = gpu_offset ^ 64;
220
221 if (is_read) {
222 memcpy(cpu_vaddr + cpu_offset,
223 gpu_vaddr + swizzled_gpu_offset,
224 this_length);
225 } else {
226 memcpy(gpu_vaddr + swizzled_gpu_offset,
227 cpu_vaddr + cpu_offset,
228 this_length);
229 }
230 cpu_offset += this_length;
231 gpu_offset += this_length;
232 length -= this_length;
233 }
234
99a03df5
CW
235 kunmap(cpu_page);
236 kunmap(gpu_page);
280b713b
EA
237}
238
eb01459f
EA
239/**
240 * This is the fast shmem pread path, which attempts to copy_from_user directly
241 * from the backing pages of the object to the user's address space. On a
242 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
243 */
244static int
245i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
246 struct drm_i915_gem_pread *args,
247 struct drm_file *file_priv)
248{
23010e43 249 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
eb01459f
EA
250 ssize_t remain;
251 loff_t offset, page_base;
252 char __user *user_data;
253 int page_offset, page_length;
254 int ret;
255
256 user_data = (char __user *) (uintptr_t) args->data_ptr;
257 remain = args->size;
258
259 mutex_lock(&dev->struct_mutex);
260
4bdadb97 261 ret = i915_gem_object_get_pages(obj, 0);
eb01459f
EA
262 if (ret != 0)
263 goto fail_unlock;
264
265 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
266 args->size);
267 if (ret != 0)
268 goto fail_put_pages;
269
23010e43 270 obj_priv = to_intel_bo(obj);
eb01459f
EA
271 offset = args->offset;
272
273 while (remain > 0) {
274 /* Operation in this page
275 *
276 * page_base = page offset within aperture
277 * page_offset = offset within page
278 * page_length = bytes to copy for this page
279 */
280 page_base = (offset & ~(PAGE_SIZE-1));
281 page_offset = offset & (PAGE_SIZE-1);
282 page_length = remain;
283 if ((page_offset + remain) > PAGE_SIZE)
284 page_length = PAGE_SIZE - page_offset;
285
286 ret = fast_shmem_read(obj_priv->pages,
287 page_base, page_offset,
288 user_data, page_length);
289 if (ret)
290 goto fail_put_pages;
291
292 remain -= page_length;
293 user_data += page_length;
294 offset += page_length;
295 }
296
297fail_put_pages:
298 i915_gem_object_put_pages(obj);
299fail_unlock:
300 mutex_unlock(&dev->struct_mutex);
301
302 return ret;
303}
304
07f73f69
CW
305static int
306i915_gem_object_get_pages_or_evict(struct drm_gem_object *obj)
307{
308 int ret;
309
4bdadb97 310 ret = i915_gem_object_get_pages(obj, __GFP_NORETRY | __GFP_NOWARN);
07f73f69
CW
311
312 /* If we've insufficient memory to map in the pages, attempt
313 * to make some space by throwing out some old buffers.
314 */
315 if (ret == -ENOMEM) {
316 struct drm_device *dev = obj->dev;
07f73f69 317
0108a3ed
DV
318 ret = i915_gem_evict_something(dev, obj->size,
319 i915_gem_get_gtt_alignment(obj));
07f73f69
CW
320 if (ret)
321 return ret;
322
4bdadb97 323 ret = i915_gem_object_get_pages(obj, 0);
07f73f69
CW
324 }
325
326 return ret;
327}
328
eb01459f
EA
329/**
330 * This is the fallback shmem pread path, which allocates temporary storage
331 * in kernel space to copy_to_user into outside of the struct_mutex, so we
332 * can copy out of the object's backing pages while holding the struct mutex
333 * and not take page faults.
334 */
335static int
336i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
337 struct drm_i915_gem_pread *args,
338 struct drm_file *file_priv)
339{
23010e43 340 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
eb01459f
EA
341 struct mm_struct *mm = current->mm;
342 struct page **user_pages;
343 ssize_t remain;
344 loff_t offset, pinned_pages, i;
345 loff_t first_data_page, last_data_page, num_pages;
346 int shmem_page_index, shmem_page_offset;
347 int data_page_index, data_page_offset;
348 int page_length;
349 int ret;
350 uint64_t data_ptr = args->data_ptr;
280b713b 351 int do_bit17_swizzling;
eb01459f
EA
352
353 remain = args->size;
354
355 /* Pin the user pages containing the data. We can't fault while
356 * holding the struct mutex, yet we want to hold it while
357 * dereferencing the user data.
358 */
359 first_data_page = data_ptr / PAGE_SIZE;
360 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
361 num_pages = last_data_page - first_data_page + 1;
362
8e7d2b2c 363 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
eb01459f
EA
364 if (user_pages == NULL)
365 return -ENOMEM;
366
367 down_read(&mm->mmap_sem);
368 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
e5e9ecde 369 num_pages, 1, 0, user_pages, NULL);
eb01459f
EA
370 up_read(&mm->mmap_sem);
371 if (pinned_pages < num_pages) {
372 ret = -EFAULT;
373 goto fail_put_user_pages;
374 }
375
280b713b
EA
376 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
377
eb01459f
EA
378 mutex_lock(&dev->struct_mutex);
379
07f73f69
CW
380 ret = i915_gem_object_get_pages_or_evict(obj);
381 if (ret)
eb01459f
EA
382 goto fail_unlock;
383
384 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
385 args->size);
386 if (ret != 0)
387 goto fail_put_pages;
388
23010e43 389 obj_priv = to_intel_bo(obj);
eb01459f
EA
390 offset = args->offset;
391
392 while (remain > 0) {
393 /* Operation in this page
394 *
395 * shmem_page_index = page number within shmem file
396 * shmem_page_offset = offset within page in shmem file
397 * data_page_index = page number in get_user_pages return
398 * data_page_offset = offset with data_page_index page.
399 * page_length = bytes to copy for this page
400 */
401 shmem_page_index = offset / PAGE_SIZE;
402 shmem_page_offset = offset & ~PAGE_MASK;
403 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
404 data_page_offset = data_ptr & ~PAGE_MASK;
405
406 page_length = remain;
407 if ((shmem_page_offset + page_length) > PAGE_SIZE)
408 page_length = PAGE_SIZE - shmem_page_offset;
409 if ((data_page_offset + page_length) > PAGE_SIZE)
410 page_length = PAGE_SIZE - data_page_offset;
411
280b713b 412 if (do_bit17_swizzling) {
99a03df5 413 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
280b713b 414 shmem_page_offset,
99a03df5
CW
415 user_pages[data_page_index],
416 data_page_offset,
417 page_length,
418 1);
419 } else {
420 slow_shmem_copy(user_pages[data_page_index],
421 data_page_offset,
422 obj_priv->pages[shmem_page_index],
423 shmem_page_offset,
424 page_length);
280b713b 425 }
eb01459f
EA
426
427 remain -= page_length;
428 data_ptr += page_length;
429 offset += page_length;
430 }
431
432fail_put_pages:
433 i915_gem_object_put_pages(obj);
434fail_unlock:
435 mutex_unlock(&dev->struct_mutex);
436fail_put_user_pages:
437 for (i = 0; i < pinned_pages; i++) {
438 SetPageDirty(user_pages[i]);
439 page_cache_release(user_pages[i]);
440 }
8e7d2b2c 441 drm_free_large(user_pages);
eb01459f
EA
442
443 return ret;
444}
445
673a394b
EA
446/**
447 * Reads data from the object referenced by handle.
448 *
449 * On error, the contents of *data are undefined.
450 */
451int
452i915_gem_pread_ioctl(struct drm_device *dev, void *data,
453 struct drm_file *file_priv)
454{
455 struct drm_i915_gem_pread *args = data;
456 struct drm_gem_object *obj;
457 struct drm_i915_gem_object *obj_priv;
673a394b
EA
458 int ret;
459
460 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
461 if (obj == NULL)
462 return -EBADF;
23010e43 463 obj_priv = to_intel_bo(obj);
673a394b
EA
464
465 /* Bounds check source.
466 *
467 * XXX: This could use review for overflow issues...
468 */
469 if (args->offset > obj->size || args->size > obj->size ||
470 args->offset + args->size > obj->size) {
bc9025bd 471 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
472 return -EINVAL;
473 }
474
280b713b 475 if (i915_gem_object_needs_bit17_swizzle(obj)) {
eb01459f 476 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
280b713b
EA
477 } else {
478 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
479 if (ret != 0)
480 ret = i915_gem_shmem_pread_slow(dev, obj, args,
481 file_priv);
482 }
673a394b 483
bc9025bd 484 drm_gem_object_unreference_unlocked(obj);
673a394b 485
eb01459f 486 return ret;
673a394b
EA
487}
488
0839ccb8
KP
489/* This is the fast write path which cannot handle
490 * page faults in the source data
9b7530cc 491 */
0839ccb8
KP
492
493static inline int
494fast_user_write(struct io_mapping *mapping,
495 loff_t page_base, int page_offset,
496 char __user *user_data,
497 int length)
9b7530cc 498{
9b7530cc 499 char *vaddr_atomic;
0839ccb8 500 unsigned long unwritten;
9b7530cc 501
fca3ec01 502 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base, KM_USER0);
0839ccb8
KP
503 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
504 user_data, length);
fca3ec01 505 io_mapping_unmap_atomic(vaddr_atomic, KM_USER0);
0839ccb8
KP
506 if (unwritten)
507 return -EFAULT;
508 return 0;
509}
510
511/* Here's the write path which can sleep for
512 * page faults
513 */
514
ab34c226 515static inline void
3de09aa3
EA
516slow_kernel_write(struct io_mapping *mapping,
517 loff_t gtt_base, int gtt_offset,
518 struct page *user_page, int user_offset,
519 int length)
0839ccb8 520{
ab34c226
CW
521 char __iomem *dst_vaddr;
522 char *src_vaddr;
0839ccb8 523
ab34c226
CW
524 dst_vaddr = io_mapping_map_wc(mapping, gtt_base);
525 src_vaddr = kmap(user_page);
526
527 memcpy_toio(dst_vaddr + gtt_offset,
528 src_vaddr + user_offset,
529 length);
530
531 kunmap(user_page);
532 io_mapping_unmap(dst_vaddr);
9b7530cc
LT
533}
534
40123c1f
EA
535static inline int
536fast_shmem_write(struct page **pages,
537 loff_t page_base, int page_offset,
538 char __user *data,
539 int length)
540{
541 char __iomem *vaddr;
d0088775 542 unsigned long unwritten;
40123c1f
EA
543
544 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
545 if (vaddr == NULL)
546 return -ENOMEM;
d0088775 547 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
40123c1f
EA
548 kunmap_atomic(vaddr, KM_USER0);
549
d0088775
DA
550 if (unwritten)
551 return -EFAULT;
40123c1f
EA
552 return 0;
553}
554
3de09aa3
EA
555/**
556 * This is the fast pwrite path, where we copy the data directly from the
557 * user into the GTT, uncached.
558 */
673a394b 559static int
3de09aa3
EA
560i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
561 struct drm_i915_gem_pwrite *args,
562 struct drm_file *file_priv)
673a394b 563{
23010e43 564 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
0839ccb8 565 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 566 ssize_t remain;
0839ccb8 567 loff_t offset, page_base;
673a394b 568 char __user *user_data;
0839ccb8
KP
569 int page_offset, page_length;
570 int ret;
673a394b
EA
571
572 user_data = (char __user *) (uintptr_t) args->data_ptr;
573 remain = args->size;
574 if (!access_ok(VERIFY_READ, user_data, remain))
575 return -EFAULT;
576
577
578 mutex_lock(&dev->struct_mutex);
579 ret = i915_gem_object_pin(obj, 0);
580 if (ret) {
581 mutex_unlock(&dev->struct_mutex);
582 return ret;
583 }
2ef7eeaa 584 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
673a394b
EA
585 if (ret)
586 goto fail;
587
23010e43 588 obj_priv = to_intel_bo(obj);
673a394b 589 offset = obj_priv->gtt_offset + args->offset;
673a394b
EA
590
591 while (remain > 0) {
592 /* Operation in this page
593 *
0839ccb8
KP
594 * page_base = page offset within aperture
595 * page_offset = offset within page
596 * page_length = bytes to copy for this page
673a394b 597 */
0839ccb8
KP
598 page_base = (offset & ~(PAGE_SIZE-1));
599 page_offset = offset & (PAGE_SIZE-1);
600 page_length = remain;
601 if ((page_offset + remain) > PAGE_SIZE)
602 page_length = PAGE_SIZE - page_offset;
603
604 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
605 page_offset, user_data, page_length);
606
607 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
608 * source page isn't available. Return the error and we'll
609 * retry in the slow path.
0839ccb8 610 */
3de09aa3
EA
611 if (ret)
612 goto fail;
673a394b 613
0839ccb8
KP
614 remain -= page_length;
615 user_data += page_length;
616 offset += page_length;
673a394b 617 }
673a394b
EA
618
619fail:
620 i915_gem_object_unpin(obj);
621 mutex_unlock(&dev->struct_mutex);
622
623 return ret;
624}
625
3de09aa3
EA
626/**
627 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
628 * the memory and maps it using kmap_atomic for copying.
629 *
630 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
631 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
632 */
3043c60c 633static int
3de09aa3
EA
634i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
635 struct drm_i915_gem_pwrite *args,
636 struct drm_file *file_priv)
673a394b 637{
23010e43 638 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
3de09aa3
EA
639 drm_i915_private_t *dev_priv = dev->dev_private;
640 ssize_t remain;
641 loff_t gtt_page_base, offset;
642 loff_t first_data_page, last_data_page, num_pages;
643 loff_t pinned_pages, i;
644 struct page **user_pages;
645 struct mm_struct *mm = current->mm;
646 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 647 int ret;
3de09aa3
EA
648 uint64_t data_ptr = args->data_ptr;
649
650 remain = args->size;
651
652 /* Pin the user pages containing the data. We can't fault while
653 * holding the struct mutex, and all of the pwrite implementations
654 * want to hold it while dereferencing the user data.
655 */
656 first_data_page = data_ptr / PAGE_SIZE;
657 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
658 num_pages = last_data_page - first_data_page + 1;
659
8e7d2b2c 660 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
3de09aa3
EA
661 if (user_pages == NULL)
662 return -ENOMEM;
663
664 down_read(&mm->mmap_sem);
665 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
666 num_pages, 0, 0, user_pages, NULL);
667 up_read(&mm->mmap_sem);
668 if (pinned_pages < num_pages) {
669 ret = -EFAULT;
670 goto out_unpin_pages;
671 }
673a394b
EA
672
673 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
674 ret = i915_gem_object_pin(obj, 0);
675 if (ret)
676 goto out_unlock;
677
678 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
679 if (ret)
680 goto out_unpin_object;
681
23010e43 682 obj_priv = to_intel_bo(obj);
3de09aa3
EA
683 offset = obj_priv->gtt_offset + args->offset;
684
685 while (remain > 0) {
686 /* Operation in this page
687 *
688 * gtt_page_base = page offset within aperture
689 * gtt_page_offset = offset within page in aperture
690 * data_page_index = page number in get_user_pages return
691 * data_page_offset = offset with data_page_index page.
692 * page_length = bytes to copy for this page
693 */
694 gtt_page_base = offset & PAGE_MASK;
695 gtt_page_offset = offset & ~PAGE_MASK;
696 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
697 data_page_offset = data_ptr & ~PAGE_MASK;
698
699 page_length = remain;
700 if ((gtt_page_offset + page_length) > PAGE_SIZE)
701 page_length = PAGE_SIZE - gtt_page_offset;
702 if ((data_page_offset + page_length) > PAGE_SIZE)
703 page_length = PAGE_SIZE - data_page_offset;
704
ab34c226
CW
705 slow_kernel_write(dev_priv->mm.gtt_mapping,
706 gtt_page_base, gtt_page_offset,
707 user_pages[data_page_index],
708 data_page_offset,
709 page_length);
3de09aa3
EA
710
711 remain -= page_length;
712 offset += page_length;
713 data_ptr += page_length;
714 }
715
716out_unpin_object:
717 i915_gem_object_unpin(obj);
718out_unlock:
719 mutex_unlock(&dev->struct_mutex);
720out_unpin_pages:
721 for (i = 0; i < pinned_pages; i++)
722 page_cache_release(user_pages[i]);
8e7d2b2c 723 drm_free_large(user_pages);
3de09aa3
EA
724
725 return ret;
726}
727
40123c1f
EA
728/**
729 * This is the fast shmem pwrite path, which attempts to directly
730 * copy_from_user into the kmapped pages backing the object.
731 */
3043c60c 732static int
40123c1f
EA
733i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
734 struct drm_i915_gem_pwrite *args,
735 struct drm_file *file_priv)
673a394b 736{
23010e43 737 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
40123c1f
EA
738 ssize_t remain;
739 loff_t offset, page_base;
740 char __user *user_data;
741 int page_offset, page_length;
673a394b 742 int ret;
40123c1f
EA
743
744 user_data = (char __user *) (uintptr_t) args->data_ptr;
745 remain = args->size;
673a394b
EA
746
747 mutex_lock(&dev->struct_mutex);
748
4bdadb97 749 ret = i915_gem_object_get_pages(obj, 0);
40123c1f
EA
750 if (ret != 0)
751 goto fail_unlock;
673a394b 752
e47c68e9 753 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
40123c1f
EA
754 if (ret != 0)
755 goto fail_put_pages;
756
23010e43 757 obj_priv = to_intel_bo(obj);
40123c1f
EA
758 offset = args->offset;
759 obj_priv->dirty = 1;
760
761 while (remain > 0) {
762 /* Operation in this page
763 *
764 * page_base = page offset within aperture
765 * page_offset = offset within page
766 * page_length = bytes to copy for this page
767 */
768 page_base = (offset & ~(PAGE_SIZE-1));
769 page_offset = offset & (PAGE_SIZE-1);
770 page_length = remain;
771 if ((page_offset + remain) > PAGE_SIZE)
772 page_length = PAGE_SIZE - page_offset;
773
774 ret = fast_shmem_write(obj_priv->pages,
775 page_base, page_offset,
776 user_data, page_length);
777 if (ret)
778 goto fail_put_pages;
779
780 remain -= page_length;
781 user_data += page_length;
782 offset += page_length;
783 }
784
785fail_put_pages:
786 i915_gem_object_put_pages(obj);
787fail_unlock:
788 mutex_unlock(&dev->struct_mutex);
789
790 return ret;
791}
792
793/**
794 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
795 * the memory and maps it using kmap_atomic for copying.
796 *
797 * This avoids taking mmap_sem for faulting on the user's address while the
798 * struct_mutex is held.
799 */
800static int
801i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
802 struct drm_i915_gem_pwrite *args,
803 struct drm_file *file_priv)
804{
23010e43 805 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
40123c1f
EA
806 struct mm_struct *mm = current->mm;
807 struct page **user_pages;
808 ssize_t remain;
809 loff_t offset, pinned_pages, i;
810 loff_t first_data_page, last_data_page, num_pages;
811 int shmem_page_index, shmem_page_offset;
812 int data_page_index, data_page_offset;
813 int page_length;
814 int ret;
815 uint64_t data_ptr = args->data_ptr;
280b713b 816 int do_bit17_swizzling;
40123c1f
EA
817
818 remain = args->size;
819
820 /* Pin the user pages containing the data. We can't fault while
821 * holding the struct mutex, and all of the pwrite implementations
822 * want to hold it while dereferencing the user data.
823 */
824 first_data_page = data_ptr / PAGE_SIZE;
825 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
826 num_pages = last_data_page - first_data_page + 1;
827
8e7d2b2c 828 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
40123c1f
EA
829 if (user_pages == NULL)
830 return -ENOMEM;
831
832 down_read(&mm->mmap_sem);
833 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
834 num_pages, 0, 0, user_pages, NULL);
835 up_read(&mm->mmap_sem);
836 if (pinned_pages < num_pages) {
837 ret = -EFAULT;
838 goto fail_put_user_pages;
673a394b
EA
839 }
840
280b713b
EA
841 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
842
40123c1f
EA
843 mutex_lock(&dev->struct_mutex);
844
07f73f69
CW
845 ret = i915_gem_object_get_pages_or_evict(obj);
846 if (ret)
40123c1f
EA
847 goto fail_unlock;
848
849 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
850 if (ret != 0)
851 goto fail_put_pages;
852
23010e43 853 obj_priv = to_intel_bo(obj);
673a394b 854 offset = args->offset;
40123c1f 855 obj_priv->dirty = 1;
673a394b 856
40123c1f
EA
857 while (remain > 0) {
858 /* Operation in this page
859 *
860 * shmem_page_index = page number within shmem file
861 * shmem_page_offset = offset within page in shmem file
862 * data_page_index = page number in get_user_pages return
863 * data_page_offset = offset with data_page_index page.
864 * page_length = bytes to copy for this page
865 */
866 shmem_page_index = offset / PAGE_SIZE;
867 shmem_page_offset = offset & ~PAGE_MASK;
868 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
869 data_page_offset = data_ptr & ~PAGE_MASK;
870
871 page_length = remain;
872 if ((shmem_page_offset + page_length) > PAGE_SIZE)
873 page_length = PAGE_SIZE - shmem_page_offset;
874 if ((data_page_offset + page_length) > PAGE_SIZE)
875 page_length = PAGE_SIZE - data_page_offset;
876
280b713b 877 if (do_bit17_swizzling) {
99a03df5 878 slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
280b713b
EA
879 shmem_page_offset,
880 user_pages[data_page_index],
881 data_page_offset,
99a03df5
CW
882 page_length,
883 0);
884 } else {
885 slow_shmem_copy(obj_priv->pages[shmem_page_index],
886 shmem_page_offset,
887 user_pages[data_page_index],
888 data_page_offset,
889 page_length);
280b713b 890 }
40123c1f
EA
891
892 remain -= page_length;
893 data_ptr += page_length;
894 offset += page_length;
673a394b
EA
895 }
896
40123c1f
EA
897fail_put_pages:
898 i915_gem_object_put_pages(obj);
899fail_unlock:
673a394b 900 mutex_unlock(&dev->struct_mutex);
40123c1f
EA
901fail_put_user_pages:
902 for (i = 0; i < pinned_pages; i++)
903 page_cache_release(user_pages[i]);
8e7d2b2c 904 drm_free_large(user_pages);
673a394b 905
40123c1f 906 return ret;
673a394b
EA
907}
908
909/**
910 * Writes data to the object referenced by handle.
911 *
912 * On error, the contents of the buffer that were to be modified are undefined.
913 */
914int
915i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
916 struct drm_file *file_priv)
917{
918 struct drm_i915_gem_pwrite *args = data;
919 struct drm_gem_object *obj;
920 struct drm_i915_gem_object *obj_priv;
921 int ret = 0;
922
923 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
924 if (obj == NULL)
925 return -EBADF;
23010e43 926 obj_priv = to_intel_bo(obj);
673a394b
EA
927
928 /* Bounds check destination.
929 *
930 * XXX: This could use review for overflow issues...
931 */
932 if (args->offset > obj->size || args->size > obj->size ||
933 args->offset + args->size > obj->size) {
bc9025bd 934 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
935 return -EINVAL;
936 }
937
938 /* We can only do the GTT pwrite on untiled buffers, as otherwise
939 * it would end up going through the fenced access, and we'll get
940 * different detiling behavior between reading and writing.
941 * pread/pwrite currently are reading and writing from the CPU
942 * perspective, requiring manual detiling by the client.
943 */
71acb5eb
DA
944 if (obj_priv->phys_obj)
945 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
946 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
9b8c4a0b
CW
947 dev->gtt_total != 0 &&
948 obj->write_domain != I915_GEM_DOMAIN_CPU) {
3de09aa3
EA
949 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
950 if (ret == -EFAULT) {
951 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
952 file_priv);
953 }
280b713b
EA
954 } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
955 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
40123c1f
EA
956 } else {
957 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
958 if (ret == -EFAULT) {
959 ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
960 file_priv);
961 }
962 }
673a394b
EA
963
964#if WATCH_PWRITE
965 if (ret)
966 DRM_INFO("pwrite failed %d\n", ret);
967#endif
968
bc9025bd 969 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
970
971 return ret;
972}
973
974/**
2ef7eeaa
EA
975 * Called when user space prepares to use an object with the CPU, either
976 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
977 */
978int
979i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
980 struct drm_file *file_priv)
981{
a09ba7fa 982 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
983 struct drm_i915_gem_set_domain *args = data;
984 struct drm_gem_object *obj;
652c393a 985 struct drm_i915_gem_object *obj_priv;
2ef7eeaa
EA
986 uint32_t read_domains = args->read_domains;
987 uint32_t write_domain = args->write_domain;
673a394b
EA
988 int ret;
989
990 if (!(dev->driver->driver_features & DRIVER_GEM))
991 return -ENODEV;
992
2ef7eeaa 993 /* Only handle setting domains to types used by the CPU. */
21d509e3 994 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
995 return -EINVAL;
996
21d509e3 997 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
998 return -EINVAL;
999
1000 /* Having something in the write domain implies it's in the read
1001 * domain, and only that read domain. Enforce that in the request.
1002 */
1003 if (write_domain != 0 && read_domains != write_domain)
1004 return -EINVAL;
1005
673a394b
EA
1006 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1007 if (obj == NULL)
1008 return -EBADF;
23010e43 1009 obj_priv = to_intel_bo(obj);
673a394b
EA
1010
1011 mutex_lock(&dev->struct_mutex);
652c393a
JB
1012
1013 intel_mark_busy(dev, obj);
1014
673a394b 1015#if WATCH_BUF
cfd43c02 1016 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
2ef7eeaa 1017 obj, obj->size, read_domains, write_domain);
673a394b 1018#endif
2ef7eeaa
EA
1019 if (read_domains & I915_GEM_DOMAIN_GTT) {
1020 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392 1021
a09ba7fa
EA
1022 /* Update the LRU on the fence for the CPU access that's
1023 * about to occur.
1024 */
1025 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
007cc8ac
DV
1026 struct drm_i915_fence_reg *reg =
1027 &dev_priv->fence_regs[obj_priv->fence_reg];
1028 list_move_tail(&reg->lru_list,
a09ba7fa
EA
1029 &dev_priv->mm.fence_list);
1030 }
1031
02354392
EA
1032 /* Silently promote "you're not bound, there was nothing to do"
1033 * to success, since the client was just asking us to
1034 * make sure everything was done.
1035 */
1036 if (ret == -EINVAL)
1037 ret = 0;
2ef7eeaa 1038 } else {
e47c68e9 1039 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1040 }
1041
673a394b
EA
1042 drm_gem_object_unreference(obj);
1043 mutex_unlock(&dev->struct_mutex);
1044 return ret;
1045}
1046
1047/**
1048 * Called when user space has done writes to this buffer
1049 */
1050int
1051i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1052 struct drm_file *file_priv)
1053{
1054 struct drm_i915_gem_sw_finish *args = data;
1055 struct drm_gem_object *obj;
1056 struct drm_i915_gem_object *obj_priv;
1057 int ret = 0;
1058
1059 if (!(dev->driver->driver_features & DRIVER_GEM))
1060 return -ENODEV;
1061
1062 mutex_lock(&dev->struct_mutex);
1063 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1064 if (obj == NULL) {
1065 mutex_unlock(&dev->struct_mutex);
1066 return -EBADF;
1067 }
1068
1069#if WATCH_BUF
cfd43c02 1070 DRM_INFO("%s: sw_finish %d (%p %zd)\n",
673a394b
EA
1071 __func__, args->handle, obj, obj->size);
1072#endif
23010e43 1073 obj_priv = to_intel_bo(obj);
673a394b
EA
1074
1075 /* Pinned buffers may be scanout, so flush the cache */
e47c68e9
EA
1076 if (obj_priv->pin_count)
1077 i915_gem_object_flush_cpu_write_domain(obj);
1078
673a394b
EA
1079 drm_gem_object_unreference(obj);
1080 mutex_unlock(&dev->struct_mutex);
1081 return ret;
1082}
1083
1084/**
1085 * Maps the contents of an object, returning the address it is mapped
1086 * into.
1087 *
1088 * While the mapping holds a reference on the contents of the object, it doesn't
1089 * imply a ref on the object itself.
1090 */
1091int
1092i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1093 struct drm_file *file_priv)
1094{
1095 struct drm_i915_gem_mmap *args = data;
1096 struct drm_gem_object *obj;
1097 loff_t offset;
1098 unsigned long addr;
1099
1100 if (!(dev->driver->driver_features & DRIVER_GEM))
1101 return -ENODEV;
1102
1103 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1104 if (obj == NULL)
1105 return -EBADF;
1106
1107 offset = args->offset;
1108
1109 down_write(&current->mm->mmap_sem);
1110 addr = do_mmap(obj->filp, 0, args->size,
1111 PROT_READ | PROT_WRITE, MAP_SHARED,
1112 args->offset);
1113 up_write(&current->mm->mmap_sem);
bc9025bd 1114 drm_gem_object_unreference_unlocked(obj);
673a394b
EA
1115 if (IS_ERR((void *)addr))
1116 return addr;
1117
1118 args->addr_ptr = (uint64_t) addr;
1119
1120 return 0;
1121}
1122
de151cf6
JB
1123/**
1124 * i915_gem_fault - fault a page into the GTT
1125 * vma: VMA in question
1126 * vmf: fault info
1127 *
1128 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1129 * from userspace. The fault handler takes care of binding the object to
1130 * the GTT (if needed), allocating and programming a fence register (again,
1131 * only if needed based on whether the old reg is still valid or the object
1132 * is tiled) and inserting a new PTE into the faulting process.
1133 *
1134 * Note that the faulting process may involve evicting existing objects
1135 * from the GTT and/or fence registers to make room. So performance may
1136 * suffer if the GTT working set is large or there are few fence registers
1137 * left.
1138 */
1139int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1140{
1141 struct drm_gem_object *obj = vma->vm_private_data;
1142 struct drm_device *dev = obj->dev;
23010e43 1143 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
1144 pgoff_t page_offset;
1145 unsigned long pfn;
1146 int ret = 0;
0f973f27 1147 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1148
1149 /* We don't use vmf->pgoff since that has the fake offset */
1150 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1151 PAGE_SHIFT;
1152
1153 /* Now bind it into the GTT if needed */
1154 mutex_lock(&dev->struct_mutex);
1155 if (!obj_priv->gtt_space) {
e67b8ce1 1156 ret = i915_gem_object_bind_to_gtt(obj, 0);
c715089f
CW
1157 if (ret)
1158 goto unlock;
07f4f3e8 1159
07f4f3e8 1160 ret = i915_gem_object_set_to_gtt_domain(obj, write);
c715089f
CW
1161 if (ret)
1162 goto unlock;
de151cf6
JB
1163 }
1164
1165 /* Need a new fence register? */
a09ba7fa 1166 if (obj_priv->tiling_mode != I915_TILING_NONE) {
8c4b8c3f 1167 ret = i915_gem_object_get_fence_reg(obj);
c715089f
CW
1168 if (ret)
1169 goto unlock;
d9ddcb96 1170 }
de151cf6
JB
1171
1172 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1173 page_offset;
1174
1175 /* Finally, remap it using the new GTT offset */
1176 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
c715089f 1177unlock:
de151cf6
JB
1178 mutex_unlock(&dev->struct_mutex);
1179
1180 switch (ret) {
c715089f
CW
1181 case 0:
1182 case -ERESTARTSYS:
1183 return VM_FAULT_NOPAGE;
de151cf6
JB
1184 case -ENOMEM:
1185 case -EAGAIN:
1186 return VM_FAULT_OOM;
de151cf6 1187 default:
c715089f 1188 return VM_FAULT_SIGBUS;
de151cf6
JB
1189 }
1190}
1191
1192/**
1193 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1194 * @obj: obj in question
1195 *
1196 * GEM memory mapping works by handing back to userspace a fake mmap offset
1197 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1198 * up the object based on the offset and sets up the various memory mapping
1199 * structures.
1200 *
1201 * This routine allocates and attaches a fake offset for @obj.
1202 */
1203static int
1204i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1205{
1206 struct drm_device *dev = obj->dev;
1207 struct drm_gem_mm *mm = dev->mm_private;
23010e43 1208 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 1209 struct drm_map_list *list;
f77d390c 1210 struct drm_local_map *map;
de151cf6
JB
1211 int ret = 0;
1212
1213 /* Set the object up for mmap'ing */
1214 list = &obj->map_list;
9a298b2a 1215 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
de151cf6
JB
1216 if (!list->map)
1217 return -ENOMEM;
1218
1219 map = list->map;
1220 map->type = _DRM_GEM;
1221 map->size = obj->size;
1222 map->handle = obj;
1223
1224 /* Get a DRM GEM mmap offset allocated... */
1225 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1226 obj->size / PAGE_SIZE, 0, 0);
1227 if (!list->file_offset_node) {
1228 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1229 ret = -ENOMEM;
1230 goto out_free_list;
1231 }
1232
1233 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1234 obj->size / PAGE_SIZE, 0);
1235 if (!list->file_offset_node) {
1236 ret = -ENOMEM;
1237 goto out_free_list;
1238 }
1239
1240 list->hash.key = list->file_offset_node->start;
1241 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1242 DRM_ERROR("failed to add to map hash\n");
5618ca6a 1243 ret = -ENOMEM;
de151cf6
JB
1244 goto out_free_mm;
1245 }
1246
1247 /* By now we should be all set, any drm_mmap request on the offset
1248 * below will get to our mmap & fault handler */
1249 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1250
1251 return 0;
1252
1253out_free_mm:
1254 drm_mm_put_block(list->file_offset_node);
1255out_free_list:
9a298b2a 1256 kfree(list->map);
de151cf6
JB
1257
1258 return ret;
1259}
1260
901782b2
CW
1261/**
1262 * i915_gem_release_mmap - remove physical page mappings
1263 * @obj: obj in question
1264 *
af901ca1 1265 * Preserve the reservation of the mmapping with the DRM core code, but
901782b2
CW
1266 * relinquish ownership of the pages back to the system.
1267 *
1268 * It is vital that we remove the page mapping if we have mapped a tiled
1269 * object through the GTT and then lose the fence register due to
1270 * resource pressure. Similarly if the object has been moved out of the
1271 * aperture, than pages mapped into userspace must be revoked. Removing the
1272 * mapping will then trigger a page fault on the next user access, allowing
1273 * fixup by i915_gem_fault().
1274 */
d05ca301 1275void
901782b2
CW
1276i915_gem_release_mmap(struct drm_gem_object *obj)
1277{
1278 struct drm_device *dev = obj->dev;
23010e43 1279 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
901782b2
CW
1280
1281 if (dev->dev_mapping)
1282 unmap_mapping_range(dev->dev_mapping,
1283 obj_priv->mmap_offset, obj->size, 1);
1284}
1285
ab00b3e5
JB
1286static void
1287i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1288{
1289 struct drm_device *dev = obj->dev;
23010e43 1290 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
ab00b3e5
JB
1291 struct drm_gem_mm *mm = dev->mm_private;
1292 struct drm_map_list *list;
1293
1294 list = &obj->map_list;
1295 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1296
1297 if (list->file_offset_node) {
1298 drm_mm_put_block(list->file_offset_node);
1299 list->file_offset_node = NULL;
1300 }
1301
1302 if (list->map) {
9a298b2a 1303 kfree(list->map);
ab00b3e5
JB
1304 list->map = NULL;
1305 }
1306
1307 obj_priv->mmap_offset = 0;
1308}
1309
de151cf6
JB
1310/**
1311 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1312 * @obj: object to check
1313 *
1314 * Return the required GTT alignment for an object, taking into account
1315 * potential fence register mapping if needed.
1316 */
1317static uint32_t
1318i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1319{
1320 struct drm_device *dev = obj->dev;
23010e43 1321 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
1322 int start, i;
1323
1324 /*
1325 * Minimum alignment is 4k (GTT page size), but might be greater
1326 * if a fence register is needed for the object.
1327 */
1328 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1329 return 4096;
1330
1331 /*
1332 * Previous chips need to be aligned to the size of the smallest
1333 * fence register that can contain the object.
1334 */
1335 if (IS_I9XX(dev))
1336 start = 1024*1024;
1337 else
1338 start = 512*1024;
1339
1340 for (i = start; i < obj->size; i <<= 1)
1341 ;
1342
1343 return i;
1344}
1345
1346/**
1347 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1348 * @dev: DRM device
1349 * @data: GTT mapping ioctl data
1350 * @file_priv: GEM object info
1351 *
1352 * Simply returns the fake offset to userspace so it can mmap it.
1353 * The mmap call will end up in drm_gem_mmap(), which will set things
1354 * up so we can get faults in the handler above.
1355 *
1356 * The fault handler will take care of binding the object into the GTT
1357 * (since it may have been evicted to make room for something), allocating
1358 * a fence register, and mapping the appropriate aperture address into
1359 * userspace.
1360 */
1361int
1362i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1363 struct drm_file *file_priv)
1364{
1365 struct drm_i915_gem_mmap_gtt *args = data;
de151cf6
JB
1366 struct drm_gem_object *obj;
1367 struct drm_i915_gem_object *obj_priv;
1368 int ret;
1369
1370 if (!(dev->driver->driver_features & DRIVER_GEM))
1371 return -ENODEV;
1372
1373 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1374 if (obj == NULL)
1375 return -EBADF;
1376
1377 mutex_lock(&dev->struct_mutex);
1378
23010e43 1379 obj_priv = to_intel_bo(obj);
de151cf6 1380
ab18282d
CW
1381 if (obj_priv->madv != I915_MADV_WILLNEED) {
1382 DRM_ERROR("Attempting to mmap a purgeable buffer\n");
1383 drm_gem_object_unreference(obj);
1384 mutex_unlock(&dev->struct_mutex);
1385 return -EINVAL;
1386 }
1387
1388
de151cf6
JB
1389 if (!obj_priv->mmap_offset) {
1390 ret = i915_gem_create_mmap_offset(obj);
13af1062
CW
1391 if (ret) {
1392 drm_gem_object_unreference(obj);
1393 mutex_unlock(&dev->struct_mutex);
de151cf6 1394 return ret;
13af1062 1395 }
de151cf6
JB
1396 }
1397
1398 args->offset = obj_priv->mmap_offset;
1399
de151cf6
JB
1400 /*
1401 * Pull it into the GTT so that we have a page list (makes the
1402 * initial fault faster and any subsequent flushing possible).
1403 */
1404 if (!obj_priv->agp_mem) {
e67b8ce1 1405 ret = i915_gem_object_bind_to_gtt(obj, 0);
de151cf6
JB
1406 if (ret) {
1407 drm_gem_object_unreference(obj);
1408 mutex_unlock(&dev->struct_mutex);
1409 return ret;
1410 }
de151cf6
JB
1411 }
1412
1413 drm_gem_object_unreference(obj);
1414 mutex_unlock(&dev->struct_mutex);
1415
1416 return 0;
1417}
1418
6911a9b8 1419void
856fa198 1420i915_gem_object_put_pages(struct drm_gem_object *obj)
673a394b 1421{
23010e43 1422 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1423 int page_count = obj->size / PAGE_SIZE;
1424 int i;
1425
856fa198 1426 BUG_ON(obj_priv->pages_refcount == 0);
bb6baf76 1427 BUG_ON(obj_priv->madv == __I915_MADV_PURGED);
673a394b 1428
856fa198
EA
1429 if (--obj_priv->pages_refcount != 0)
1430 return;
673a394b 1431
280b713b
EA
1432 if (obj_priv->tiling_mode != I915_TILING_NONE)
1433 i915_gem_object_save_bit_17_swizzle(obj);
1434
3ef94daa 1435 if (obj_priv->madv == I915_MADV_DONTNEED)
13a05fd9 1436 obj_priv->dirty = 0;
3ef94daa
CW
1437
1438 for (i = 0; i < page_count; i++) {
3ef94daa
CW
1439 if (obj_priv->dirty)
1440 set_page_dirty(obj_priv->pages[i]);
1441
1442 if (obj_priv->madv == I915_MADV_WILLNEED)
856fa198 1443 mark_page_accessed(obj_priv->pages[i]);
3ef94daa
CW
1444
1445 page_cache_release(obj_priv->pages[i]);
1446 }
673a394b
EA
1447 obj_priv->dirty = 0;
1448
8e7d2b2c 1449 drm_free_large(obj_priv->pages);
856fa198 1450 obj_priv->pages = NULL;
673a394b
EA
1451}
1452
1453static void
852835f3
ZN
1454i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno,
1455 struct intel_ring_buffer *ring)
673a394b
EA
1456{
1457 struct drm_device *dev = obj->dev;
1458 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1459 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
852835f3
ZN
1460 BUG_ON(ring == NULL);
1461 obj_priv->ring = ring;
673a394b
EA
1462
1463 /* Add a reference if we're newly entering the active list. */
1464 if (!obj_priv->active) {
1465 drm_gem_object_reference(obj);
1466 obj_priv->active = 1;
1467 }
1468 /* Move from whatever list we were on to the tail of execution. */
5e118f41 1469 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 1470 list_move_tail(&obj_priv->list, &ring->active_list);
5e118f41 1471 spin_unlock(&dev_priv->mm.active_list_lock);
ce44b0ea 1472 obj_priv->last_rendering_seqno = seqno;
673a394b
EA
1473}
1474
ce44b0ea
EA
1475static void
1476i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1477{
1478 struct drm_device *dev = obj->dev;
1479 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1480 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
ce44b0ea
EA
1481
1482 BUG_ON(!obj_priv->active);
1483 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1484 obj_priv->last_rendering_seqno = 0;
1485}
673a394b 1486
963b4836
CW
1487/* Immediately discard the backing storage */
1488static void
1489i915_gem_object_truncate(struct drm_gem_object *obj)
1490{
23010e43 1491 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
bb6baf76 1492 struct inode *inode;
963b4836 1493
bb6baf76
CW
1494 inode = obj->filp->f_path.dentry->d_inode;
1495 if (inode->i_op->truncate)
1496 inode->i_op->truncate (inode);
1497
1498 obj_priv->madv = __I915_MADV_PURGED;
963b4836
CW
1499}
1500
1501static inline int
1502i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
1503{
1504 return obj_priv->madv == I915_MADV_DONTNEED;
1505}
1506
673a394b
EA
1507static void
1508i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1509{
1510 struct drm_device *dev = obj->dev;
1511 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1512 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1513
1514 i915_verify_inactive(dev, __FILE__, __LINE__);
1515 if (obj_priv->pin_count != 0)
1516 list_del_init(&obj_priv->list);
1517 else
1518 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1519
99fcb766
DV
1520 BUG_ON(!list_empty(&obj_priv->gpu_write_list));
1521
ce44b0ea 1522 obj_priv->last_rendering_seqno = 0;
852835f3 1523 obj_priv->ring = NULL;
673a394b
EA
1524 if (obj_priv->active) {
1525 obj_priv->active = 0;
1526 drm_gem_object_unreference(obj);
1527 }
1528 i915_verify_inactive(dev, __FILE__, __LINE__);
1529}
1530
63560396
DV
1531static void
1532i915_gem_process_flushing_list(struct drm_device *dev,
852835f3
ZN
1533 uint32_t flush_domains, uint32_t seqno,
1534 struct intel_ring_buffer *ring)
63560396
DV
1535{
1536 drm_i915_private_t *dev_priv = dev->dev_private;
1537 struct drm_i915_gem_object *obj_priv, *next;
1538
1539 list_for_each_entry_safe(obj_priv, next,
1540 &dev_priv->mm.gpu_write_list,
1541 gpu_write_list) {
a8089e84 1542 struct drm_gem_object *obj = &obj_priv->base;
63560396
DV
1543
1544 if ((obj->write_domain & flush_domains) ==
852835f3
ZN
1545 obj->write_domain &&
1546 obj_priv->ring->ring_flag == ring->ring_flag) {
63560396
DV
1547 uint32_t old_write_domain = obj->write_domain;
1548
1549 obj->write_domain = 0;
1550 list_del_init(&obj_priv->gpu_write_list);
852835f3 1551 i915_gem_object_move_to_active(obj, seqno, ring);
63560396
DV
1552
1553 /* update the fence lru list */
007cc8ac
DV
1554 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1555 struct drm_i915_fence_reg *reg =
1556 &dev_priv->fence_regs[obj_priv->fence_reg];
1557 list_move_tail(&reg->lru_list,
63560396 1558 &dev_priv->mm.fence_list);
007cc8ac 1559 }
63560396
DV
1560
1561 trace_i915_gem_object_change_domain(obj,
1562 obj->read_domains,
1563 old_write_domain);
1564 }
1565 }
1566}
8187a2b7 1567
5a5a0c64 1568uint32_t
b962442e 1569i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
852835f3 1570 uint32_t flush_domains, struct intel_ring_buffer *ring)
673a394b
EA
1571{
1572 drm_i915_private_t *dev_priv = dev->dev_private;
b962442e 1573 struct drm_i915_file_private *i915_file_priv = NULL;
673a394b
EA
1574 struct drm_i915_gem_request *request;
1575 uint32_t seqno;
1576 int was_empty;
673a394b 1577
b962442e
EA
1578 if (file_priv != NULL)
1579 i915_file_priv = file_priv->driver_priv;
1580
9a298b2a 1581 request = kzalloc(sizeof(*request), GFP_KERNEL);
673a394b
EA
1582 if (request == NULL)
1583 return 0;
1584
852835f3 1585 seqno = ring->add_request(dev, ring, file_priv, flush_domains);
673a394b
EA
1586
1587 request->seqno = seqno;
852835f3 1588 request->ring = ring;
673a394b 1589 request->emitted_jiffies = jiffies;
852835f3
ZN
1590 was_empty = list_empty(&ring->request_list);
1591 list_add_tail(&request->list, &ring->request_list);
1592
b962442e
EA
1593 if (i915_file_priv) {
1594 list_add_tail(&request->client_list,
1595 &i915_file_priv->mm.request_list);
1596 } else {
1597 INIT_LIST_HEAD(&request->client_list);
1598 }
673a394b 1599
ce44b0ea
EA
1600 /* Associate any objects on the flushing list matching the write
1601 * domain we're flushing with our flush.
1602 */
63560396 1603 if (flush_domains != 0)
852835f3 1604 i915_gem_process_flushing_list(dev, flush_domains, seqno, ring);
ce44b0ea 1605
f65d9421
BG
1606 if (!dev_priv->mm.suspended) {
1607 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1608 if (was_empty)
1609 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1610 }
673a394b
EA
1611 return seqno;
1612}
1613
1614/**
1615 * Command execution barrier
1616 *
1617 * Ensures that all commands in the ring are finished
1618 * before signalling the CPU
1619 */
3043c60c 1620static uint32_t
852835f3 1621i915_retire_commands(struct drm_device *dev, struct intel_ring_buffer *ring)
673a394b 1622{
673a394b 1623 uint32_t flush_domains = 0;
673a394b
EA
1624
1625 /* The sampler always gets flushed on i965 (sigh) */
1626 if (IS_I965G(dev))
1627 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
852835f3
ZN
1628
1629 ring->flush(dev, ring,
1630 I915_GEM_DOMAIN_COMMAND, flush_domains);
673a394b
EA
1631 return flush_domains;
1632}
1633
1634/**
1635 * Moves buffers associated only with the given active seqno from the active
1636 * to inactive list, potentially freeing them.
1637 */
1638static void
1639i915_gem_retire_request(struct drm_device *dev,
1640 struct drm_i915_gem_request *request)
1641{
1642 drm_i915_private_t *dev_priv = dev->dev_private;
1643
1c5d22f7
CW
1644 trace_i915_gem_request_retire(dev, request->seqno);
1645
673a394b
EA
1646 /* Move any buffers on the active list that are no longer referenced
1647 * by the ringbuffer to the flushing/inactive lists as appropriate.
1648 */
5e118f41 1649 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 1650 while (!list_empty(&request->ring->active_list)) {
673a394b
EA
1651 struct drm_gem_object *obj;
1652 struct drm_i915_gem_object *obj_priv;
1653
852835f3 1654 obj_priv = list_first_entry(&request->ring->active_list,
673a394b
EA
1655 struct drm_i915_gem_object,
1656 list);
a8089e84 1657 obj = &obj_priv->base;
673a394b
EA
1658
1659 /* If the seqno being retired doesn't match the oldest in the
1660 * list, then the oldest in the list must still be newer than
1661 * this seqno.
1662 */
1663 if (obj_priv->last_rendering_seqno != request->seqno)
5e118f41 1664 goto out;
de151cf6 1665
673a394b
EA
1666#if WATCH_LRU
1667 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1668 __func__, request->seqno, obj);
1669#endif
1670
ce44b0ea
EA
1671 if (obj->write_domain != 0)
1672 i915_gem_object_move_to_flushing(obj);
68c84342
SL
1673 else {
1674 /* Take a reference on the object so it won't be
1675 * freed while the spinlock is held. The list
1676 * protection for this spinlock is safe when breaking
1677 * the lock like this since the next thing we do
1678 * is just get the head of the list again.
1679 */
1680 drm_gem_object_reference(obj);
673a394b 1681 i915_gem_object_move_to_inactive(obj);
68c84342
SL
1682 spin_unlock(&dev_priv->mm.active_list_lock);
1683 drm_gem_object_unreference(obj);
1684 spin_lock(&dev_priv->mm.active_list_lock);
1685 }
673a394b 1686 }
5e118f41
CW
1687out:
1688 spin_unlock(&dev_priv->mm.active_list_lock);
673a394b
EA
1689}
1690
1691/**
1692 * Returns true if seq1 is later than seq2.
1693 */
22be1724 1694bool
673a394b
EA
1695i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1696{
1697 return (int32_t)(seq1 - seq2) >= 0;
1698}
1699
1700uint32_t
852835f3 1701i915_get_gem_seqno(struct drm_device *dev,
d1b851fc 1702 struct intel_ring_buffer *ring)
673a394b 1703{
852835f3 1704 return ring->get_gem_seqno(dev, ring);
673a394b
EA
1705}
1706
1707/**
1708 * This function clears the request list as sequence numbers are passed.
1709 */
b09a1fec
CW
1710static void
1711i915_gem_retire_requests_ring(struct drm_device *dev,
1712 struct intel_ring_buffer *ring)
673a394b
EA
1713{
1714 drm_i915_private_t *dev_priv = dev->dev_private;
1715 uint32_t seqno;
1716
8187a2b7 1717 if (!ring->status_page.page_addr
852835f3 1718 || list_empty(&ring->request_list))
6c0594a3
KW
1719 return;
1720
852835f3 1721 seqno = i915_get_gem_seqno(dev, ring);
673a394b 1722
852835f3 1723 while (!list_empty(&ring->request_list)) {
673a394b
EA
1724 struct drm_i915_gem_request *request;
1725 uint32_t retiring_seqno;
1726
852835f3 1727 request = list_first_entry(&ring->request_list,
673a394b
EA
1728 struct drm_i915_gem_request,
1729 list);
1730 retiring_seqno = request->seqno;
1731
1732 if (i915_seqno_passed(seqno, retiring_seqno) ||
ba1234d1 1733 atomic_read(&dev_priv->mm.wedged)) {
673a394b
EA
1734 i915_gem_retire_request(dev, request);
1735
1736 list_del(&request->list);
b962442e 1737 list_del(&request->client_list);
9a298b2a 1738 kfree(request);
673a394b
EA
1739 } else
1740 break;
1741 }
9d34e5db
CW
1742
1743 if (unlikely (dev_priv->trace_irq_seqno &&
1744 i915_seqno_passed(dev_priv->trace_irq_seqno, seqno))) {
8187a2b7
ZN
1745
1746 ring->user_irq_put(dev, ring);
9d34e5db
CW
1747 dev_priv->trace_irq_seqno = 0;
1748 }
673a394b
EA
1749}
1750
b09a1fec
CW
1751void
1752i915_gem_retire_requests(struct drm_device *dev)
1753{
1754 drm_i915_private_t *dev_priv = dev->dev_private;
1755
be72615b
CW
1756 if (!list_empty(&dev_priv->mm.deferred_free_list)) {
1757 struct drm_i915_gem_object *obj_priv, *tmp;
1758
1759 /* We must be careful that during unbind() we do not
1760 * accidentally infinitely recurse into retire requests.
1761 * Currently:
1762 * retire -> free -> unbind -> wait -> retire_ring
1763 */
1764 list_for_each_entry_safe(obj_priv, tmp,
1765 &dev_priv->mm.deferred_free_list,
1766 list)
1767 i915_gem_free_object_tail(&obj_priv->base);
1768 }
1769
b09a1fec
CW
1770 i915_gem_retire_requests_ring(dev, &dev_priv->render_ring);
1771 if (HAS_BSD(dev))
1772 i915_gem_retire_requests_ring(dev, &dev_priv->bsd_ring);
1773}
1774
673a394b
EA
1775void
1776i915_gem_retire_work_handler(struct work_struct *work)
1777{
1778 drm_i915_private_t *dev_priv;
1779 struct drm_device *dev;
1780
1781 dev_priv = container_of(work, drm_i915_private_t,
1782 mm.retire_work.work);
1783 dev = dev_priv->dev;
1784
1785 mutex_lock(&dev->struct_mutex);
b09a1fec 1786 i915_gem_retire_requests(dev);
d1b851fc 1787
6dbe2772 1788 if (!dev_priv->mm.suspended &&
d1b851fc
ZN
1789 (!list_empty(&dev_priv->render_ring.request_list) ||
1790 (HAS_BSD(dev) &&
1791 !list_empty(&dev_priv->bsd_ring.request_list))))
9c9fe1f8 1792 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
673a394b
EA
1793 mutex_unlock(&dev->struct_mutex);
1794}
1795
5a5a0c64 1796int
852835f3
ZN
1797i915_do_wait_request(struct drm_device *dev, uint32_t seqno,
1798 int interruptible, struct intel_ring_buffer *ring)
673a394b
EA
1799{
1800 drm_i915_private_t *dev_priv = dev->dev_private;
802c7eb6 1801 u32 ier;
673a394b
EA
1802 int ret = 0;
1803
1804 BUG_ON(seqno == 0);
1805
ba1234d1 1806 if (atomic_read(&dev_priv->mm.wedged))
ffed1d09
BG
1807 return -EIO;
1808
852835f3 1809 if (!i915_seqno_passed(ring->get_gem_seqno(dev, ring), seqno)) {
bad720ff 1810 if (HAS_PCH_SPLIT(dev))
036a4a7d
ZW
1811 ier = I915_READ(DEIER) | I915_READ(GTIER);
1812 else
1813 ier = I915_READ(IER);
802c7eb6
JB
1814 if (!ier) {
1815 DRM_ERROR("something (likely vbetool) disabled "
1816 "interrupts, re-enabling\n");
1817 i915_driver_irq_preinstall(dev);
1818 i915_driver_irq_postinstall(dev);
1819 }
1820
1c5d22f7
CW
1821 trace_i915_gem_request_wait_begin(dev, seqno);
1822
852835f3 1823 ring->waiting_gem_seqno = seqno;
8187a2b7 1824 ring->user_irq_get(dev, ring);
48764bf4 1825 if (interruptible)
852835f3
ZN
1826 ret = wait_event_interruptible(ring->irq_queue,
1827 i915_seqno_passed(
1828 ring->get_gem_seqno(dev, ring), seqno)
1829 || atomic_read(&dev_priv->mm.wedged));
48764bf4 1830 else
852835f3
ZN
1831 wait_event(ring->irq_queue,
1832 i915_seqno_passed(
1833 ring->get_gem_seqno(dev, ring), seqno)
1834 || atomic_read(&dev_priv->mm.wedged));
48764bf4 1835
8187a2b7 1836 ring->user_irq_put(dev, ring);
852835f3 1837 ring->waiting_gem_seqno = 0;
1c5d22f7
CW
1838
1839 trace_i915_gem_request_wait_end(dev, seqno);
673a394b 1840 }
ba1234d1 1841 if (atomic_read(&dev_priv->mm.wedged))
673a394b
EA
1842 ret = -EIO;
1843
1844 if (ret && ret != -ERESTARTSYS)
1845 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
852835f3 1846 __func__, ret, seqno, ring->get_gem_seqno(dev, ring));
673a394b
EA
1847
1848 /* Directly dispatch request retiring. While we have the work queue
1849 * to handle this, the waiter on a request often wants an associated
1850 * buffer to have made it to the inactive list, and we would need
1851 * a separate wait queue to handle that.
1852 */
1853 if (ret == 0)
b09a1fec 1854 i915_gem_retire_requests_ring(dev, ring);
673a394b
EA
1855
1856 return ret;
1857}
1858
48764bf4
DV
1859/**
1860 * Waits for a sequence number to be signaled, and cleans up the
1861 * request and object lists appropriately for that event.
1862 */
1863static int
852835f3
ZN
1864i915_wait_request(struct drm_device *dev, uint32_t seqno,
1865 struct intel_ring_buffer *ring)
48764bf4 1866{
852835f3 1867 return i915_do_wait_request(dev, seqno, 1, ring);
48764bf4
DV
1868}
1869
8187a2b7
ZN
1870static void
1871i915_gem_flush(struct drm_device *dev,
1872 uint32_t invalidate_domains,
1873 uint32_t flush_domains)
1874{
1875 drm_i915_private_t *dev_priv = dev->dev_private;
1876 if (flush_domains & I915_GEM_DOMAIN_CPU)
1877 drm_agp_chipset_flush(dev);
1878 dev_priv->render_ring.flush(dev, &dev_priv->render_ring,
1879 invalidate_domains,
1880 flush_domains);
d1b851fc
ZN
1881
1882 if (HAS_BSD(dev))
1883 dev_priv->bsd_ring.flush(dev, &dev_priv->bsd_ring,
1884 invalidate_domains,
1885 flush_domains);
8187a2b7
ZN
1886}
1887
852835f3
ZN
1888static void
1889i915_gem_flush_ring(struct drm_device *dev,
1890 uint32_t invalidate_domains,
1891 uint32_t flush_domains,
1892 struct intel_ring_buffer *ring)
1893{
1894 if (flush_domains & I915_GEM_DOMAIN_CPU)
1895 drm_agp_chipset_flush(dev);
1896 ring->flush(dev, ring,
1897 invalidate_domains,
1898 flush_domains);
1899}
1900
673a394b
EA
1901/**
1902 * Ensures that all rendering to the object has completed and the object is
1903 * safe to unbind from the GTT or access from the CPU.
1904 */
1905static int
1906i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1907{
1908 struct drm_device *dev = obj->dev;
23010e43 1909 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1910 int ret;
1911
e47c68e9
EA
1912 /* This function only exists to support waiting for existing rendering,
1913 * not for emitting required flushes.
673a394b 1914 */
e47c68e9 1915 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
1916
1917 /* If there is rendering queued on the buffer being evicted, wait for
1918 * it.
1919 */
1920 if (obj_priv->active) {
1921#if WATCH_BUF
1922 DRM_INFO("%s: object %p wait for seqno %08x\n",
1923 __func__, obj, obj_priv->last_rendering_seqno);
1924#endif
852835f3
ZN
1925 ret = i915_wait_request(dev,
1926 obj_priv->last_rendering_seqno, obj_priv->ring);
673a394b
EA
1927 if (ret != 0)
1928 return ret;
1929 }
1930
1931 return 0;
1932}
1933
1934/**
1935 * Unbinds an object from the GTT aperture.
1936 */
0f973f27 1937int
673a394b
EA
1938i915_gem_object_unbind(struct drm_gem_object *obj)
1939{
1940 struct drm_device *dev = obj->dev;
4a87b8ca 1941 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 1942 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
1943 int ret = 0;
1944
1945#if WATCH_BUF
1946 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1947 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1948#endif
1949 if (obj_priv->gtt_space == NULL)
1950 return 0;
1951
1952 if (obj_priv->pin_count != 0) {
1953 DRM_ERROR("Attempting to unbind pinned buffer\n");
1954 return -EINVAL;
1955 }
1956
5323fd04
EA
1957 /* blow away mappings if mapped through GTT */
1958 i915_gem_release_mmap(obj);
1959
673a394b
EA
1960 /* Move the object to the CPU domain to ensure that
1961 * any possible CPU writes while it's not in the GTT
1962 * are flushed when we go to remap it. This will
1963 * also ensure that all pending GPU writes are finished
1964 * before we unbind.
1965 */
e47c68e9 1966 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
8dc1775d 1967 if (ret == -ERESTARTSYS)
673a394b 1968 return ret;
8dc1775d
CW
1969 /* Continue on if we fail due to EIO, the GPU is hung so we
1970 * should be safe and we need to cleanup or else we might
1971 * cause memory corruption through use-after-free.
1972 */
673a394b 1973
5323fd04
EA
1974 BUG_ON(obj_priv->active);
1975
96b47b65
DV
1976 /* release the fence reg _after_ flushing */
1977 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1978 i915_gem_clear_fence_reg(obj);
1979
673a394b
EA
1980 if (obj_priv->agp_mem != NULL) {
1981 drm_unbind_agp(obj_priv->agp_mem);
1982 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1983 obj_priv->agp_mem = NULL;
1984 }
1985
856fa198 1986 i915_gem_object_put_pages(obj);
a32808c0 1987 BUG_ON(obj_priv->pages_refcount);
673a394b
EA
1988
1989 if (obj_priv->gtt_space) {
1990 atomic_dec(&dev->gtt_count);
1991 atomic_sub(obj->size, &dev->gtt_memory);
1992
1993 drm_mm_put_block(obj_priv->gtt_space);
1994 obj_priv->gtt_space = NULL;
1995 }
1996
1997 /* Remove ourselves from the LRU list if present. */
4a87b8ca 1998 spin_lock(&dev_priv->mm.active_list_lock);
673a394b
EA
1999 if (!list_empty(&obj_priv->list))
2000 list_del_init(&obj_priv->list);
4a87b8ca 2001 spin_unlock(&dev_priv->mm.active_list_lock);
673a394b 2002
963b4836
CW
2003 if (i915_gem_object_is_purgeable(obj_priv))
2004 i915_gem_object_truncate(obj);
2005
1c5d22f7
CW
2006 trace_i915_gem_object_unbind(obj);
2007
8dc1775d 2008 return ret;
673a394b
EA
2009}
2010
0108a3ed
DV
2011static int
2012i915_gem_scan_inactive_list_and_evict(struct drm_device *dev, int min_size,
2013 unsigned alignment, int *found)
07f73f69
CW
2014{
2015 drm_i915_private_t *dev_priv = dev->dev_private;
0108a3ed 2016 struct drm_gem_object *obj;
07f73f69
CW
2017 struct drm_i915_gem_object *obj_priv;
2018 struct drm_gem_object *best = NULL;
2019 struct drm_gem_object *first = NULL;
2020
2021 /* Try to find the smallest clean object */
2022 list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
a8089e84 2023 struct drm_gem_object *obj = &obj_priv->base;
07f73f69 2024 if (obj->size >= min_size) {
963b4836
CW
2025 if ((!obj_priv->dirty ||
2026 i915_gem_object_is_purgeable(obj_priv)) &&
07f73f69
CW
2027 (!best || obj->size < best->size)) {
2028 best = obj;
2029 if (best->size == min_size)
0108a3ed 2030 break;
07f73f69
CW
2031 }
2032 if (!first)
2033 first = obj;
2034 }
2035 }
2036
0108a3ed
DV
2037 obj = best ? best : first;
2038
2039 if (!obj) {
2040 *found = 0;
2041 return 0;
2042 }
2043
2044 *found = 1;
2045
2046#if WATCH_LRU
2047 DRM_INFO("%s: evicting %p\n", __func__, obj);
2048#endif
2049 obj_priv = to_intel_bo(obj);
2050 BUG_ON(obj_priv->pin_count != 0);
2051 BUG_ON(obj_priv->active);
2052
2053 /* Wait on the rendering and unbind the buffer. */
2054 return i915_gem_object_unbind(obj);
07f73f69
CW
2055}
2056
4df2faf4
DV
2057static int
2058i915_gpu_idle(struct drm_device *dev)
2059{
2060 drm_i915_private_t *dev_priv = dev->dev_private;
2061 bool lists_empty;
d1b851fc 2062 uint32_t seqno1, seqno2;
852835f3 2063 int ret;
4df2faf4
DV
2064
2065 spin_lock(&dev_priv->mm.active_list_lock);
d1b851fc
ZN
2066 lists_empty = (list_empty(&dev_priv->mm.flushing_list) &&
2067 list_empty(&dev_priv->render_ring.active_list) &&
2068 (!HAS_BSD(dev) ||
2069 list_empty(&dev_priv->bsd_ring.active_list)));
4df2faf4
DV
2070 spin_unlock(&dev_priv->mm.active_list_lock);
2071
2072 if (lists_empty)
2073 return 0;
2074
2075 /* Flush everything onto the inactive list. */
2076 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
d1b851fc 2077 seqno1 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
852835f3 2078 &dev_priv->render_ring);
d1b851fc 2079 if (seqno1 == 0)
4df2faf4 2080 return -ENOMEM;
d1b851fc
ZN
2081 ret = i915_wait_request(dev, seqno1, &dev_priv->render_ring);
2082
2083 if (HAS_BSD(dev)) {
2084 seqno2 = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS,
2085 &dev_priv->bsd_ring);
2086 if (seqno2 == 0)
2087 return -ENOMEM;
2088
2089 ret = i915_wait_request(dev, seqno2, &dev_priv->bsd_ring);
2090 if (ret)
2091 return ret;
2092 }
2093
4df2faf4 2094
852835f3 2095 return ret;
4df2faf4
DV
2096}
2097
673a394b 2098static int
07f73f69
CW
2099i915_gem_evict_everything(struct drm_device *dev)
2100{
2101 drm_i915_private_t *dev_priv = dev->dev_private;
07f73f69
CW
2102 int ret;
2103 bool lists_empty;
2104
07f73f69
CW
2105 spin_lock(&dev_priv->mm.active_list_lock);
2106 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2107 list_empty(&dev_priv->mm.flushing_list) &&
d1b851fc
ZN
2108 list_empty(&dev_priv->render_ring.active_list) &&
2109 (!HAS_BSD(dev)
2110 || list_empty(&dev_priv->bsd_ring.active_list)));
07f73f69
CW
2111 spin_unlock(&dev_priv->mm.active_list_lock);
2112
9731129c 2113 if (lists_empty)
07f73f69 2114 return -ENOSPC;
07f73f69
CW
2115
2116 /* Flush everything (on to the inactive lists) and evict */
4df2faf4 2117 ret = i915_gpu_idle(dev);
07f73f69
CW
2118 if (ret)
2119 return ret;
2120
99fcb766
DV
2121 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
2122
ab5ee576 2123 ret = i915_gem_evict_from_inactive_list(dev);
07f73f69
CW
2124 if (ret)
2125 return ret;
2126
2127 spin_lock(&dev_priv->mm.active_list_lock);
2128 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2129 list_empty(&dev_priv->mm.flushing_list) &&
d1b851fc
ZN
2130 list_empty(&dev_priv->render_ring.active_list) &&
2131 (!HAS_BSD(dev)
2132 || list_empty(&dev_priv->bsd_ring.active_list)));
07f73f69
CW
2133 spin_unlock(&dev_priv->mm.active_list_lock);
2134 BUG_ON(!lists_empty);
2135
2136 return 0;
2137}
2138
673a394b 2139static int
0108a3ed
DV
2140i915_gem_evict_something(struct drm_device *dev,
2141 int min_size, unsigned alignment)
673a394b
EA
2142{
2143 drm_i915_private_t *dev_priv = dev->dev_private;
0108a3ed 2144 int ret, found;
673a394b 2145
852835f3 2146 struct intel_ring_buffer *render_ring = &dev_priv->render_ring;
d1b851fc 2147 struct intel_ring_buffer *bsd_ring = &dev_priv->bsd_ring;
673a394b 2148 for (;;) {
b09a1fec 2149 i915_gem_retire_requests(dev);
d1b851fc 2150
673a394b
EA
2151 /* If there's an inactive buffer available now, grab it
2152 * and be done.
2153 */
0108a3ed
DV
2154 ret = i915_gem_scan_inactive_list_and_evict(dev, min_size,
2155 alignment,
2156 &found);
2157 if (found)
2158 return ret;
673a394b
EA
2159
2160 /* If we didn't get anything, but the ring is still processing
07f73f69
CW
2161 * things, wait for the next to finish and hopefully leave us
2162 * a buffer to evict.
673a394b 2163 */
852835f3 2164 if (!list_empty(&render_ring->request_list)) {
673a394b
EA
2165 struct drm_i915_gem_request *request;
2166
852835f3 2167 request = list_first_entry(&render_ring->request_list,
673a394b
EA
2168 struct drm_i915_gem_request,
2169 list);
2170
852835f3
ZN
2171 ret = i915_wait_request(dev,
2172 request->seqno, request->ring);
673a394b 2173 if (ret)
07f73f69 2174 return ret;
673a394b 2175
07f73f69 2176 continue;
673a394b
EA
2177 }
2178
d1b851fc
ZN
2179 if (HAS_BSD(dev) && !list_empty(&bsd_ring->request_list)) {
2180 struct drm_i915_gem_request *request;
2181
2182 request = list_first_entry(&bsd_ring->request_list,
2183 struct drm_i915_gem_request,
2184 list);
2185
2186 ret = i915_wait_request(dev,
2187 request->seqno, request->ring);
2188 if (ret)
2189 return ret;
2190
2191 continue;
2192 }
2193
673a394b
EA
2194 /* If we didn't have anything on the request list but there
2195 * are buffers awaiting a flush, emit one and try again.
2196 * When we wait on it, those buffers waiting for that flush
2197 * will get moved to inactive.
2198 */
2199 if (!list_empty(&dev_priv->mm.flushing_list)) {
0108a3ed 2200 struct drm_gem_object *obj = NULL;
07f73f69 2201 struct drm_i915_gem_object *obj_priv;
673a394b 2202
9a1e2582
CW
2203 /* Find an object that we can immediately reuse */
2204 list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, list) {
a8089e84 2205 obj = &obj_priv->base;
9a1e2582
CW
2206 if (obj->size >= min_size)
2207 break;
673a394b 2208
9a1e2582
CW
2209 obj = NULL;
2210 }
673a394b 2211
9a1e2582
CW
2212 if (obj != NULL) {
2213 uint32_t seqno;
673a394b 2214
852835f3
ZN
2215 i915_gem_flush_ring(dev,
2216 obj->write_domain,
9a1e2582 2217 obj->write_domain,
852835f3
ZN
2218 obj_priv->ring);
2219 seqno = i915_add_request(dev, NULL,
2220 obj->write_domain,
2221 obj_priv->ring);
9a1e2582
CW
2222 if (seqno == 0)
2223 return -ENOMEM;
9a1e2582
CW
2224 continue;
2225 }
673a394b
EA
2226 }
2227
07f73f69
CW
2228 /* If we didn't do any of the above, there's no single buffer
2229 * large enough to swap out for the new one, so just evict
2230 * everything and start again. (This should be rare.)
673a394b 2231 */
9731129c 2232 if (!list_empty (&dev_priv->mm.inactive_list))
ab5ee576 2233 return i915_gem_evict_from_inactive_list(dev);
9731129c 2234 else
07f73f69 2235 return i915_gem_evict_everything(dev);
ac94a962 2236 }
ac94a962
KP
2237}
2238
6911a9b8 2239int
4bdadb97
CW
2240i915_gem_object_get_pages(struct drm_gem_object *obj,
2241 gfp_t gfpmask)
673a394b 2242{
23010e43 2243 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
2244 int page_count, i;
2245 struct address_space *mapping;
2246 struct inode *inode;
2247 struct page *page;
673a394b 2248
778c3544
DV
2249 BUG_ON(obj_priv->pages_refcount
2250 == DRM_I915_GEM_OBJECT_MAX_PAGES_REFCOUNT);
2251
856fa198 2252 if (obj_priv->pages_refcount++ != 0)
673a394b
EA
2253 return 0;
2254
2255 /* Get the list of pages out of our struct file. They'll be pinned
2256 * at this point until we release them.
2257 */
2258 page_count = obj->size / PAGE_SIZE;
856fa198 2259 BUG_ON(obj_priv->pages != NULL);
8e7d2b2c 2260 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
856fa198 2261 if (obj_priv->pages == NULL) {
856fa198 2262 obj_priv->pages_refcount--;
673a394b
EA
2263 return -ENOMEM;
2264 }
2265
2266 inode = obj->filp->f_path.dentry->d_inode;
2267 mapping = inode->i_mapping;
2268 for (i = 0; i < page_count; i++) {
4bdadb97 2269 page = read_cache_page_gfp(mapping, i,
985b823b 2270 GFP_HIGHUSER |
4bdadb97 2271 __GFP_COLD |
cd9f040d 2272 __GFP_RECLAIMABLE |
4bdadb97 2273 gfpmask);
1f2b1013
CW
2274 if (IS_ERR(page))
2275 goto err_pages;
2276
856fa198 2277 obj_priv->pages[i] = page;
673a394b 2278 }
280b713b
EA
2279
2280 if (obj_priv->tiling_mode != I915_TILING_NONE)
2281 i915_gem_object_do_bit_17_swizzle(obj);
2282
673a394b 2283 return 0;
1f2b1013
CW
2284
2285err_pages:
2286 while (i--)
2287 page_cache_release(obj_priv->pages[i]);
2288
2289 drm_free_large(obj_priv->pages);
2290 obj_priv->pages = NULL;
2291 obj_priv->pages_refcount--;
2292 return PTR_ERR(page);
673a394b
EA
2293}
2294
4e901fdc
EA
2295static void sandybridge_write_fence_reg(struct drm_i915_fence_reg *reg)
2296{
2297 struct drm_gem_object *obj = reg->obj;
2298 struct drm_device *dev = obj->dev;
2299 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2300 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4e901fdc
EA
2301 int regnum = obj_priv->fence_reg;
2302 uint64_t val;
2303
2304 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2305 0xfffff000) << 32;
2306 val |= obj_priv->gtt_offset & 0xfffff000;
2307 val |= (uint64_t)((obj_priv->stride / 128) - 1) <<
2308 SANDYBRIDGE_FENCE_PITCH_SHIFT;
2309
2310 if (obj_priv->tiling_mode == I915_TILING_Y)
2311 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2312 val |= I965_FENCE_REG_VALID;
2313
2314 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (regnum * 8), val);
2315}
2316
de151cf6
JB
2317static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2318{
2319 struct drm_gem_object *obj = reg->obj;
2320 struct drm_device *dev = obj->dev;
2321 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2322 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
2323 int regnum = obj_priv->fence_reg;
2324 uint64_t val;
2325
2326 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2327 0xfffff000) << 32;
2328 val |= obj_priv->gtt_offset & 0xfffff000;
2329 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2330 if (obj_priv->tiling_mode == I915_TILING_Y)
2331 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2332 val |= I965_FENCE_REG_VALID;
2333
2334 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2335}
2336
2337static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2338{
2339 struct drm_gem_object *obj = reg->obj;
2340 struct drm_device *dev = obj->dev;
2341 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2342 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 2343 int regnum = obj_priv->fence_reg;
0f973f27 2344 int tile_width;
dc529a4f 2345 uint32_t fence_reg, val;
de151cf6
JB
2346 uint32_t pitch_val;
2347
2348 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2349 (obj_priv->gtt_offset & (obj->size - 1))) {
f06da264 2350 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
0f973f27 2351 __func__, obj_priv->gtt_offset, obj->size);
de151cf6
JB
2352 return;
2353 }
2354
0f973f27
JB
2355 if (obj_priv->tiling_mode == I915_TILING_Y &&
2356 HAS_128_BYTE_Y_TILING(dev))
2357 tile_width = 128;
de151cf6 2358 else
0f973f27
JB
2359 tile_width = 512;
2360
2361 /* Note: pitch better be a power of two tile widths */
2362 pitch_val = obj_priv->stride / tile_width;
2363 pitch_val = ffs(pitch_val) - 1;
de151cf6 2364
c36a2a6d
DV
2365 if (obj_priv->tiling_mode == I915_TILING_Y &&
2366 HAS_128_BYTE_Y_TILING(dev))
2367 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2368 else
2369 WARN_ON(pitch_val > I915_FENCE_MAX_PITCH_VAL);
2370
de151cf6
JB
2371 val = obj_priv->gtt_offset;
2372 if (obj_priv->tiling_mode == I915_TILING_Y)
2373 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2374 val |= I915_FENCE_SIZE_BITS(obj->size);
2375 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2376 val |= I830_FENCE_REG_VALID;
2377
dc529a4f
EA
2378 if (regnum < 8)
2379 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2380 else
2381 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2382 I915_WRITE(fence_reg, val);
de151cf6
JB
2383}
2384
2385static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2386{
2387 struct drm_gem_object *obj = reg->obj;
2388 struct drm_device *dev = obj->dev;
2389 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2390 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6
JB
2391 int regnum = obj_priv->fence_reg;
2392 uint32_t val;
2393 uint32_t pitch_val;
8d7773a3 2394 uint32_t fence_size_bits;
de151cf6 2395
8d7773a3 2396 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
de151cf6 2397 (obj_priv->gtt_offset & (obj->size - 1))) {
8d7773a3 2398 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
0f973f27 2399 __func__, obj_priv->gtt_offset);
de151cf6
JB
2400 return;
2401 }
2402
e76a16de
EA
2403 pitch_val = obj_priv->stride / 128;
2404 pitch_val = ffs(pitch_val) - 1;
2405 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2406
de151cf6
JB
2407 val = obj_priv->gtt_offset;
2408 if (obj_priv->tiling_mode == I915_TILING_Y)
2409 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
8d7773a3
DV
2410 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2411 WARN_ON(fence_size_bits & ~0x00000f00);
2412 val |= fence_size_bits;
de151cf6
JB
2413 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2414 val |= I830_FENCE_REG_VALID;
2415
2416 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
de151cf6
JB
2417}
2418
ae3db24a
DV
2419static int i915_find_fence_reg(struct drm_device *dev)
2420{
2421 struct drm_i915_fence_reg *reg = NULL;
2422 struct drm_i915_gem_object *obj_priv = NULL;
2423 struct drm_i915_private *dev_priv = dev->dev_private;
2424 struct drm_gem_object *obj = NULL;
2425 int i, avail, ret;
2426
2427 /* First try to find a free reg */
2428 avail = 0;
2429 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2430 reg = &dev_priv->fence_regs[i];
2431 if (!reg->obj)
2432 return i;
2433
23010e43 2434 obj_priv = to_intel_bo(reg->obj);
ae3db24a
DV
2435 if (!obj_priv->pin_count)
2436 avail++;
2437 }
2438
2439 if (avail == 0)
2440 return -ENOSPC;
2441
2442 /* None available, try to steal one or wait for a user to finish */
2443 i = I915_FENCE_REG_NONE;
007cc8ac
DV
2444 list_for_each_entry(reg, &dev_priv->mm.fence_list,
2445 lru_list) {
2446 obj = reg->obj;
2447 obj_priv = to_intel_bo(obj);
ae3db24a
DV
2448
2449 if (obj_priv->pin_count)
2450 continue;
2451
2452 /* found one! */
2453 i = obj_priv->fence_reg;
2454 break;
2455 }
2456
2457 BUG_ON(i == I915_FENCE_REG_NONE);
2458
2459 /* We only have a reference on obj from the active list. put_fence_reg
2460 * might drop that one, causing a use-after-free in it. So hold a
2461 * private reference to obj like the other callers of put_fence_reg
2462 * (set_tiling ioctl) do. */
2463 drm_gem_object_reference(obj);
2464 ret = i915_gem_object_put_fence_reg(obj);
2465 drm_gem_object_unreference(obj);
2466 if (ret != 0)
2467 return ret;
2468
2469 return i;
2470}
2471
de151cf6
JB
2472/**
2473 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2474 * @obj: object to map through a fence reg
2475 *
2476 * When mapping objects through the GTT, userspace wants to be able to write
2477 * to them without having to worry about swizzling if the object is tiled.
2478 *
2479 * This function walks the fence regs looking for a free one for @obj,
2480 * stealing one if it can't find any.
2481 *
2482 * It then sets up the reg based on the object's properties: address, pitch
2483 * and tiling format.
2484 */
8c4b8c3f
CW
2485int
2486i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
de151cf6
JB
2487{
2488 struct drm_device *dev = obj->dev;
79e53945 2489 struct drm_i915_private *dev_priv = dev->dev_private;
23010e43 2490 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
de151cf6 2491 struct drm_i915_fence_reg *reg = NULL;
ae3db24a 2492 int ret;
de151cf6 2493
a09ba7fa
EA
2494 /* Just update our place in the LRU if our fence is getting used. */
2495 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
007cc8ac
DV
2496 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
2497 list_move_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa
EA
2498 return 0;
2499 }
2500
de151cf6
JB
2501 switch (obj_priv->tiling_mode) {
2502 case I915_TILING_NONE:
2503 WARN(1, "allocating a fence for non-tiled object?\n");
2504 break;
2505 case I915_TILING_X:
0f973f27
JB
2506 if (!obj_priv->stride)
2507 return -EINVAL;
2508 WARN((obj_priv->stride & (512 - 1)),
2509 "object 0x%08x is X tiled but has non-512B pitch\n",
2510 obj_priv->gtt_offset);
de151cf6
JB
2511 break;
2512 case I915_TILING_Y:
0f973f27
JB
2513 if (!obj_priv->stride)
2514 return -EINVAL;
2515 WARN((obj_priv->stride & (128 - 1)),
2516 "object 0x%08x is Y tiled but has non-128B pitch\n",
2517 obj_priv->gtt_offset);
de151cf6
JB
2518 break;
2519 }
2520
ae3db24a
DV
2521 ret = i915_find_fence_reg(dev);
2522 if (ret < 0)
2523 return ret;
de151cf6 2524
ae3db24a
DV
2525 obj_priv->fence_reg = ret;
2526 reg = &dev_priv->fence_regs[obj_priv->fence_reg];
007cc8ac 2527 list_add_tail(&reg->lru_list, &dev_priv->mm.fence_list);
a09ba7fa 2528
de151cf6
JB
2529 reg->obj = obj;
2530
4e901fdc
EA
2531 if (IS_GEN6(dev))
2532 sandybridge_write_fence_reg(reg);
2533 else if (IS_I965G(dev))
de151cf6
JB
2534 i965_write_fence_reg(reg);
2535 else if (IS_I9XX(dev))
2536 i915_write_fence_reg(reg);
2537 else
2538 i830_write_fence_reg(reg);
d9ddcb96 2539
ae3db24a
DV
2540 trace_i915_gem_object_get_fence(obj, obj_priv->fence_reg,
2541 obj_priv->tiling_mode);
1c5d22f7 2542
d9ddcb96 2543 return 0;
de151cf6
JB
2544}
2545
2546/**
2547 * i915_gem_clear_fence_reg - clear out fence register info
2548 * @obj: object to clear
2549 *
2550 * Zeroes out the fence register itself and clears out the associated
2551 * data structures in dev_priv and obj_priv.
2552 */
2553static void
2554i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2555{
2556 struct drm_device *dev = obj->dev;
79e53945 2557 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2558 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
007cc8ac
DV
2559 struct drm_i915_fence_reg *reg =
2560 &dev_priv->fence_regs[obj_priv->fence_reg];
de151cf6 2561
4e901fdc
EA
2562 if (IS_GEN6(dev)) {
2563 I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 +
2564 (obj_priv->fence_reg * 8), 0);
2565 } else if (IS_I965G(dev)) {
de151cf6 2566 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
4e901fdc 2567 } else {
dc529a4f
EA
2568 uint32_t fence_reg;
2569
2570 if (obj_priv->fence_reg < 8)
2571 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2572 else
2573 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2574 8) * 4;
2575
2576 I915_WRITE(fence_reg, 0);
2577 }
de151cf6 2578
007cc8ac 2579 reg->obj = NULL;
de151cf6 2580 obj_priv->fence_reg = I915_FENCE_REG_NONE;
007cc8ac 2581 list_del_init(&reg->lru_list);
de151cf6
JB
2582}
2583
52dc7d32
CW
2584/**
2585 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2586 * to the buffer to finish, and then resets the fence register.
2587 * @obj: tiled object holding a fence register.
2588 *
2589 * Zeroes out the fence register itself and clears out the associated
2590 * data structures in dev_priv and obj_priv.
2591 */
2592int
2593i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2594{
2595 struct drm_device *dev = obj->dev;
23010e43 2596 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
52dc7d32
CW
2597
2598 if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2599 return 0;
2600
10ae9bd2
DV
2601 /* If we've changed tiling, GTT-mappings of the object
2602 * need to re-fault to ensure that the correct fence register
2603 * setup is in place.
2604 */
2605 i915_gem_release_mmap(obj);
2606
52dc7d32
CW
2607 /* On the i915, GPU access to tiled buffers is via a fence,
2608 * therefore we must wait for any outstanding access to complete
2609 * before clearing the fence.
2610 */
2611 if (!IS_I965G(dev)) {
2612 int ret;
2613
2dafb1e0
CW
2614 ret = i915_gem_object_flush_gpu_write_domain(obj);
2615 if (ret != 0)
2616 return ret;
2617
52dc7d32
CW
2618 ret = i915_gem_object_wait_rendering(obj);
2619 if (ret != 0)
2620 return ret;
2621 }
2622
4a726612 2623 i915_gem_object_flush_gtt_write_domain(obj);
52dc7d32
CW
2624 i915_gem_clear_fence_reg (obj);
2625
2626 return 0;
2627}
2628
673a394b
EA
2629/**
2630 * Finds free space in the GTT aperture and binds the object there.
2631 */
2632static int
2633i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2634{
2635 struct drm_device *dev = obj->dev;
2636 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 2637 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 2638 struct drm_mm_node *free_space;
4bdadb97 2639 gfp_t gfpmask = __GFP_NORETRY | __GFP_NOWARN;
07f73f69 2640 int ret;
673a394b 2641
bb6baf76 2642 if (obj_priv->madv != I915_MADV_WILLNEED) {
3ef94daa
CW
2643 DRM_ERROR("Attempting to bind a purgeable object\n");
2644 return -EINVAL;
2645 }
2646
673a394b 2647 if (alignment == 0)
0f973f27 2648 alignment = i915_gem_get_gtt_alignment(obj);
8d7773a3 2649 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
673a394b
EA
2650 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2651 return -EINVAL;
2652 }
2653
654fc607
CW
2654 /* If the object is bigger than the entire aperture, reject it early
2655 * before evicting everything in a vain attempt to find space.
2656 */
2657 if (obj->size > dev->gtt_total) {
2658 DRM_ERROR("Attempting to bind an object larger than the aperture\n");
2659 return -E2BIG;
2660 }
2661
673a394b
EA
2662 search_free:
2663 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2664 obj->size, alignment, 0);
2665 if (free_space != NULL) {
2666 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2667 alignment);
db3307a9 2668 if (obj_priv->gtt_space != NULL)
673a394b 2669 obj_priv->gtt_offset = obj_priv->gtt_space->start;
673a394b
EA
2670 }
2671 if (obj_priv->gtt_space == NULL) {
2672 /* If the gtt is empty and we're still having trouble
2673 * fitting our object in, we're out of memory.
2674 */
2675#if WATCH_LRU
2676 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2677#endif
0108a3ed 2678 ret = i915_gem_evict_something(dev, obj->size, alignment);
9731129c 2679 if (ret)
673a394b 2680 return ret;
9731129c 2681
673a394b
EA
2682 goto search_free;
2683 }
2684
2685#if WATCH_BUF
cfd43c02 2686 DRM_INFO("Binding object of size %zd at 0x%08x\n",
673a394b
EA
2687 obj->size, obj_priv->gtt_offset);
2688#endif
4bdadb97 2689 ret = i915_gem_object_get_pages(obj, gfpmask);
673a394b
EA
2690 if (ret) {
2691 drm_mm_put_block(obj_priv->gtt_space);
2692 obj_priv->gtt_space = NULL;
07f73f69
CW
2693
2694 if (ret == -ENOMEM) {
2695 /* first try to clear up some space from the GTT */
0108a3ed
DV
2696 ret = i915_gem_evict_something(dev, obj->size,
2697 alignment);
07f73f69 2698 if (ret) {
07f73f69 2699 /* now try to shrink everyone else */
4bdadb97
CW
2700 if (gfpmask) {
2701 gfpmask = 0;
2702 goto search_free;
07f73f69
CW
2703 }
2704
2705 return ret;
2706 }
2707
2708 goto search_free;
2709 }
2710
673a394b
EA
2711 return ret;
2712 }
2713
673a394b
EA
2714 /* Create an AGP memory structure pointing at our pages, and bind it
2715 * into the GTT.
2716 */
2717 obj_priv->agp_mem = drm_agp_bind_pages(dev,
856fa198 2718 obj_priv->pages,
07f73f69 2719 obj->size >> PAGE_SHIFT,
ba1eb1d8
KP
2720 obj_priv->gtt_offset,
2721 obj_priv->agp_type);
673a394b 2722 if (obj_priv->agp_mem == NULL) {
856fa198 2723 i915_gem_object_put_pages(obj);
673a394b
EA
2724 drm_mm_put_block(obj_priv->gtt_space);
2725 obj_priv->gtt_space = NULL;
07f73f69 2726
0108a3ed 2727 ret = i915_gem_evict_something(dev, obj->size, alignment);
9731129c 2728 if (ret)
07f73f69 2729 return ret;
07f73f69
CW
2730
2731 goto search_free;
673a394b
EA
2732 }
2733 atomic_inc(&dev->gtt_count);
2734 atomic_add(obj->size, &dev->gtt_memory);
2735
bf1a1092
CW
2736 /* keep track of bounds object by adding it to the inactive list */
2737 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
2738
673a394b
EA
2739 /* Assert that the object is not currently in any GPU domain. As it
2740 * wasn't in the GTT, there shouldn't be any way it could have been in
2741 * a GPU cache
2742 */
21d509e3
CW
2743 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2744 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
673a394b 2745
1c5d22f7
CW
2746 trace_i915_gem_object_bind(obj, obj_priv->gtt_offset);
2747
673a394b
EA
2748 return 0;
2749}
2750
2751void
2752i915_gem_clflush_object(struct drm_gem_object *obj)
2753{
23010e43 2754 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
2755
2756 /* If we don't have a page list set up, then we're not pinned
2757 * to GPU, and we can ignore the cache flush because it'll happen
2758 * again at bind time.
2759 */
856fa198 2760 if (obj_priv->pages == NULL)
673a394b
EA
2761 return;
2762
1c5d22f7 2763 trace_i915_gem_object_clflush(obj);
cfa16a0d 2764
856fa198 2765 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
673a394b
EA
2766}
2767
e47c68e9 2768/** Flushes any GPU write domain for the object if it's dirty. */
2dafb1e0 2769static int
e47c68e9
EA
2770i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2771{
2772 struct drm_device *dev = obj->dev;
1c5d22f7 2773 uint32_t old_write_domain;
852835f3 2774 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
e47c68e9
EA
2775
2776 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2dafb1e0 2777 return 0;
e47c68e9
EA
2778
2779 /* Queue the GPU write cache flushing we need. */
1c5d22f7 2780 old_write_domain = obj->write_domain;
e47c68e9 2781 i915_gem_flush(dev, 0, obj->write_domain);
2dafb1e0
CW
2782 if (i915_add_request(dev, NULL, obj->write_domain, obj_priv->ring) == 0)
2783 return -ENOMEM;
1c5d22f7
CW
2784
2785 trace_i915_gem_object_change_domain(obj,
2786 obj->read_domains,
2787 old_write_domain);
2dafb1e0 2788 return 0;
e47c68e9
EA
2789}
2790
2791/** Flushes the GTT write domain for the object if it's dirty. */
2792static void
2793i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2794{
1c5d22f7
CW
2795 uint32_t old_write_domain;
2796
e47c68e9
EA
2797 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2798 return;
2799
2800 /* No actual flushing is required for the GTT write domain. Writes
2801 * to it immediately go to main memory as far as we know, so there's
2802 * no chipset flush. It also doesn't land in render cache.
2803 */
1c5d22f7 2804 old_write_domain = obj->write_domain;
e47c68e9 2805 obj->write_domain = 0;
1c5d22f7
CW
2806
2807 trace_i915_gem_object_change_domain(obj,
2808 obj->read_domains,
2809 old_write_domain);
e47c68e9
EA
2810}
2811
2812/** Flushes the CPU write domain for the object if it's dirty. */
2813static void
2814i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2815{
2816 struct drm_device *dev = obj->dev;
1c5d22f7 2817 uint32_t old_write_domain;
e47c68e9
EA
2818
2819 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2820 return;
2821
2822 i915_gem_clflush_object(obj);
2823 drm_agp_chipset_flush(dev);
1c5d22f7 2824 old_write_domain = obj->write_domain;
e47c68e9 2825 obj->write_domain = 0;
1c5d22f7
CW
2826
2827 trace_i915_gem_object_change_domain(obj,
2828 obj->read_domains,
2829 old_write_domain);
e47c68e9
EA
2830}
2831
2dafb1e0 2832int
6b95a207
KH
2833i915_gem_object_flush_write_domain(struct drm_gem_object *obj)
2834{
2dafb1e0
CW
2835 int ret = 0;
2836
6b95a207
KH
2837 switch (obj->write_domain) {
2838 case I915_GEM_DOMAIN_GTT:
2839 i915_gem_object_flush_gtt_write_domain(obj);
2840 break;
2841 case I915_GEM_DOMAIN_CPU:
2842 i915_gem_object_flush_cpu_write_domain(obj);
2843 break;
2844 default:
2dafb1e0 2845 ret = i915_gem_object_flush_gpu_write_domain(obj);
6b95a207
KH
2846 break;
2847 }
2dafb1e0
CW
2848
2849 return ret;
6b95a207
KH
2850}
2851
2ef7eeaa
EA
2852/**
2853 * Moves a single object to the GTT read, and possibly write domain.
2854 *
2855 * This function returns when the move is complete, including waiting on
2856 * flushes to occur.
2857 */
79e53945 2858int
2ef7eeaa
EA
2859i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2860{
23010e43 2861 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 2862 uint32_t old_write_domain, old_read_domains;
e47c68e9 2863 int ret;
2ef7eeaa 2864
02354392
EA
2865 /* Not valid to be called on unbound objects. */
2866 if (obj_priv->gtt_space == NULL)
2867 return -EINVAL;
2868
2dafb1e0
CW
2869 ret = i915_gem_object_flush_gpu_write_domain(obj);
2870 if (ret != 0)
2871 return ret;
2872
e47c68e9
EA
2873 /* Wait on any GPU rendering and flushing to occur. */
2874 ret = i915_gem_object_wait_rendering(obj);
2875 if (ret != 0)
2876 return ret;
2877
1c5d22f7
CW
2878 old_write_domain = obj->write_domain;
2879 old_read_domains = obj->read_domains;
2880
e47c68e9
EA
2881 /* If we're writing through the GTT domain, then CPU and GPU caches
2882 * will need to be invalidated at next use.
2ef7eeaa 2883 */
e47c68e9
EA
2884 if (write)
2885 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2ef7eeaa 2886
e47c68e9 2887 i915_gem_object_flush_cpu_write_domain(obj);
2ef7eeaa 2888
e47c68e9
EA
2889 /* It should now be out of any other write domains, and we can update
2890 * the domain values for our changes.
2891 */
2892 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2893 obj->read_domains |= I915_GEM_DOMAIN_GTT;
2894 if (write) {
2895 obj->write_domain = I915_GEM_DOMAIN_GTT;
2896 obj_priv->dirty = 1;
2ef7eeaa
EA
2897 }
2898
1c5d22f7
CW
2899 trace_i915_gem_object_change_domain(obj,
2900 old_read_domains,
2901 old_write_domain);
2902
e47c68e9
EA
2903 return 0;
2904}
2905
b9241ea3
ZW
2906/*
2907 * Prepare buffer for display plane. Use uninterruptible for possible flush
2908 * wait, as in modesetting process we're not supposed to be interrupted.
2909 */
2910int
2911i915_gem_object_set_to_display_plane(struct drm_gem_object *obj)
2912{
2913 struct drm_device *dev = obj->dev;
23010e43 2914 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
b9241ea3
ZW
2915 uint32_t old_write_domain, old_read_domains;
2916 int ret;
2917
2918 /* Not valid to be called on unbound objects. */
2919 if (obj_priv->gtt_space == NULL)
2920 return -EINVAL;
2921
2dafb1e0
CW
2922 ret = i915_gem_object_flush_gpu_write_domain(obj);
2923 if (ret)
2924 return ret;
b9241ea3
ZW
2925
2926 /* Wait on any GPU rendering and flushing to occur. */
2927 if (obj_priv->active) {
2928#if WATCH_BUF
2929 DRM_INFO("%s: object %p wait for seqno %08x\n",
2930 __func__, obj, obj_priv->last_rendering_seqno);
2931#endif
852835f3
ZN
2932 ret = i915_do_wait_request(dev,
2933 obj_priv->last_rendering_seqno,
2934 0,
2935 obj_priv->ring);
b9241ea3
ZW
2936 if (ret != 0)
2937 return ret;
2938 }
2939
b118c1e3
CW
2940 i915_gem_object_flush_cpu_write_domain(obj);
2941
b9241ea3
ZW
2942 old_write_domain = obj->write_domain;
2943 old_read_domains = obj->read_domains;
2944
b9241ea3
ZW
2945 /* It should now be out of any other write domains, and we can update
2946 * the domain values for our changes.
2947 */
2948 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
b118c1e3 2949 obj->read_domains = I915_GEM_DOMAIN_GTT;
b9241ea3
ZW
2950 obj->write_domain = I915_GEM_DOMAIN_GTT;
2951 obj_priv->dirty = 1;
2952
2953 trace_i915_gem_object_change_domain(obj,
2954 old_read_domains,
2955 old_write_domain);
2956
2957 return 0;
2958}
2959
e47c68e9
EA
2960/**
2961 * Moves a single object to the CPU read, and possibly write domain.
2962 *
2963 * This function returns when the move is complete, including waiting on
2964 * flushes to occur.
2965 */
2966static int
2967i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2968{
1c5d22f7 2969 uint32_t old_write_domain, old_read_domains;
e47c68e9
EA
2970 int ret;
2971
2dafb1e0
CW
2972 ret = i915_gem_object_flush_gpu_write_domain(obj);
2973 if (ret)
2974 return ret;
2975
2ef7eeaa 2976 /* Wait on any GPU rendering and flushing to occur. */
e47c68e9
EA
2977 ret = i915_gem_object_wait_rendering(obj);
2978 if (ret != 0)
2979 return ret;
2ef7eeaa 2980
e47c68e9 2981 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2982
e47c68e9
EA
2983 /* If we have a partially-valid cache of the object in the CPU,
2984 * finish invalidating it and free the per-page flags.
2ef7eeaa 2985 */
e47c68e9 2986 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 2987
1c5d22f7
CW
2988 old_write_domain = obj->write_domain;
2989 old_read_domains = obj->read_domains;
2990
e47c68e9
EA
2991 /* Flush the CPU cache if it's still invalid. */
2992 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2993 i915_gem_clflush_object(obj);
2ef7eeaa 2994
e47c68e9 2995 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2996 }
2997
2998 /* It should now be out of any other write domains, and we can update
2999 * the domain values for our changes.
3000 */
e47c68e9
EA
3001 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3002
3003 /* If we're writing through the CPU, then the GPU read domains will
3004 * need to be invalidated at next use.
3005 */
3006 if (write) {
3007 obj->read_domains &= I915_GEM_DOMAIN_CPU;
3008 obj->write_domain = I915_GEM_DOMAIN_CPU;
3009 }
2ef7eeaa 3010
1c5d22f7
CW
3011 trace_i915_gem_object_change_domain(obj,
3012 old_read_domains,
3013 old_write_domain);
3014
2ef7eeaa
EA
3015 return 0;
3016}
3017
673a394b
EA
3018/*
3019 * Set the next domain for the specified object. This
3020 * may not actually perform the necessary flushing/invaliding though,
3021 * as that may want to be batched with other set_domain operations
3022 *
3023 * This is (we hope) the only really tricky part of gem. The goal
3024 * is fairly simple -- track which caches hold bits of the object
3025 * and make sure they remain coherent. A few concrete examples may
3026 * help to explain how it works. For shorthand, we use the notation
3027 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
3028 * a pair of read and write domain masks.
3029 *
3030 * Case 1: the batch buffer
3031 *
3032 * 1. Allocated
3033 * 2. Written by CPU
3034 * 3. Mapped to GTT
3035 * 4. Read by GPU
3036 * 5. Unmapped from GTT
3037 * 6. Freed
3038 *
3039 * Let's take these a step at a time
3040 *
3041 * 1. Allocated
3042 * Pages allocated from the kernel may still have
3043 * cache contents, so we set them to (CPU, CPU) always.
3044 * 2. Written by CPU (using pwrite)
3045 * The pwrite function calls set_domain (CPU, CPU) and
3046 * this function does nothing (as nothing changes)
3047 * 3. Mapped by GTT
3048 * This function asserts that the object is not
3049 * currently in any GPU-based read or write domains
3050 * 4. Read by GPU
3051 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
3052 * As write_domain is zero, this function adds in the
3053 * current read domains (CPU+COMMAND, 0).
3054 * flush_domains is set to CPU.
3055 * invalidate_domains is set to COMMAND
3056 * clflush is run to get data out of the CPU caches
3057 * then i915_dev_set_domain calls i915_gem_flush to
3058 * emit an MI_FLUSH and drm_agp_chipset_flush
3059 * 5. Unmapped from GTT
3060 * i915_gem_object_unbind calls set_domain (CPU, CPU)
3061 * flush_domains and invalidate_domains end up both zero
3062 * so no flushing/invalidating happens
3063 * 6. Freed
3064 * yay, done
3065 *
3066 * Case 2: The shared render buffer
3067 *
3068 * 1. Allocated
3069 * 2. Mapped to GTT
3070 * 3. Read/written by GPU
3071 * 4. set_domain to (CPU,CPU)
3072 * 5. Read/written by CPU
3073 * 6. Read/written by GPU
3074 *
3075 * 1. Allocated
3076 * Same as last example, (CPU, CPU)
3077 * 2. Mapped to GTT
3078 * Nothing changes (assertions find that it is not in the GPU)
3079 * 3. Read/written by GPU
3080 * execbuffer calls set_domain (RENDER, RENDER)
3081 * flush_domains gets CPU
3082 * invalidate_domains gets GPU
3083 * clflush (obj)
3084 * MI_FLUSH and drm_agp_chipset_flush
3085 * 4. set_domain (CPU, CPU)
3086 * flush_domains gets GPU
3087 * invalidate_domains gets CPU
3088 * wait_rendering (obj) to make sure all drawing is complete.
3089 * This will include an MI_FLUSH to get the data from GPU
3090 * to memory
3091 * clflush (obj) to invalidate the CPU cache
3092 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
3093 * 5. Read/written by CPU
3094 * cache lines are loaded and dirtied
3095 * 6. Read written by GPU
3096 * Same as last GPU access
3097 *
3098 * Case 3: The constant buffer
3099 *
3100 * 1. Allocated
3101 * 2. Written by CPU
3102 * 3. Read by GPU
3103 * 4. Updated (written) by CPU again
3104 * 5. Read by GPU
3105 *
3106 * 1. Allocated
3107 * (CPU, CPU)
3108 * 2. Written by CPU
3109 * (CPU, CPU)
3110 * 3. Read by GPU
3111 * (CPU+RENDER, 0)
3112 * flush_domains = CPU
3113 * invalidate_domains = RENDER
3114 * clflush (obj)
3115 * MI_FLUSH
3116 * drm_agp_chipset_flush
3117 * 4. Updated (written) by CPU again
3118 * (CPU, CPU)
3119 * flush_domains = 0 (no previous write domain)
3120 * invalidate_domains = 0 (no new read domains)
3121 * 5. Read by GPU
3122 * (CPU+RENDER, 0)
3123 * flush_domains = CPU
3124 * invalidate_domains = RENDER
3125 * clflush (obj)
3126 * MI_FLUSH
3127 * drm_agp_chipset_flush
3128 */
c0d90829 3129static void
8b0e378a 3130i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
673a394b
EA
3131{
3132 struct drm_device *dev = obj->dev;
88f356b7 3133 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 3134 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
3135 uint32_t invalidate_domains = 0;
3136 uint32_t flush_domains = 0;
1c5d22f7 3137 uint32_t old_read_domains;
e47c68e9 3138
8b0e378a
EA
3139 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
3140 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
673a394b 3141
652c393a
JB
3142 intel_mark_busy(dev, obj);
3143
673a394b
EA
3144#if WATCH_BUF
3145 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
3146 __func__, obj,
8b0e378a
EA
3147 obj->read_domains, obj->pending_read_domains,
3148 obj->write_domain, obj->pending_write_domain);
673a394b
EA
3149#endif
3150 /*
3151 * If the object isn't moving to a new write domain,
3152 * let the object stay in multiple read domains
3153 */
8b0e378a
EA
3154 if (obj->pending_write_domain == 0)
3155 obj->pending_read_domains |= obj->read_domains;
673a394b
EA
3156 else
3157 obj_priv->dirty = 1;
3158
3159 /*
3160 * Flush the current write domain if
3161 * the new read domains don't match. Invalidate
3162 * any read domains which differ from the old
3163 * write domain
3164 */
8b0e378a
EA
3165 if (obj->write_domain &&
3166 obj->write_domain != obj->pending_read_domains) {
673a394b 3167 flush_domains |= obj->write_domain;
8b0e378a
EA
3168 invalidate_domains |=
3169 obj->pending_read_domains & ~obj->write_domain;
673a394b
EA
3170 }
3171 /*
3172 * Invalidate any read caches which may have
3173 * stale data. That is, any new read domains.
3174 */
8b0e378a 3175 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
673a394b
EA
3176 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
3177#if WATCH_BUF
3178 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
3179 __func__, flush_domains, invalidate_domains);
3180#endif
673a394b
EA
3181 i915_gem_clflush_object(obj);
3182 }
3183
1c5d22f7
CW
3184 old_read_domains = obj->read_domains;
3185
efbeed96
EA
3186 /* The actual obj->write_domain will be updated with
3187 * pending_write_domain after we emit the accumulated flush for all
3188 * of our domain changes in execbuffers (which clears objects'
3189 * write_domains). So if we have a current write domain that we
3190 * aren't changing, set pending_write_domain to that.
3191 */
3192 if (flush_domains == 0 && obj->pending_write_domain == 0)
3193 obj->pending_write_domain = obj->write_domain;
8b0e378a 3194 obj->read_domains = obj->pending_read_domains;
673a394b 3195
88f356b7
CW
3196 if (flush_domains & I915_GEM_GPU_DOMAINS) {
3197 if (obj_priv->ring == &dev_priv->render_ring)
3198 dev_priv->flush_rings |= FLUSH_RENDER_RING;
3199 else if (obj_priv->ring == &dev_priv->bsd_ring)
3200 dev_priv->flush_rings |= FLUSH_BSD_RING;
3201 }
3202
673a394b
EA
3203 dev->invalidate_domains |= invalidate_domains;
3204 dev->flush_domains |= flush_domains;
3205#if WATCH_BUF
3206 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
3207 __func__,
3208 obj->read_domains, obj->write_domain,
3209 dev->invalidate_domains, dev->flush_domains);
3210#endif
1c5d22f7
CW
3211
3212 trace_i915_gem_object_change_domain(obj,
3213 old_read_domains,
3214 obj->write_domain);
673a394b
EA
3215}
3216
3217/**
e47c68e9 3218 * Moves the object from a partially CPU read to a full one.
673a394b 3219 *
e47c68e9
EA
3220 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
3221 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 3222 */
e47c68e9
EA
3223static void
3224i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
673a394b 3225{
23010e43 3226 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 3227
e47c68e9
EA
3228 if (!obj_priv->page_cpu_valid)
3229 return;
3230
3231 /* If we're partially in the CPU read domain, finish moving it in.
3232 */
3233 if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
3234 int i;
3235
3236 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
3237 if (obj_priv->page_cpu_valid[i])
3238 continue;
856fa198 3239 drm_clflush_pages(obj_priv->pages + i, 1);
e47c68e9 3240 }
e47c68e9
EA
3241 }
3242
3243 /* Free the page_cpu_valid mappings which are now stale, whether
3244 * or not we've got I915_GEM_DOMAIN_CPU.
3245 */
9a298b2a 3246 kfree(obj_priv->page_cpu_valid);
e47c68e9
EA
3247 obj_priv->page_cpu_valid = NULL;
3248}
3249
3250/**
3251 * Set the CPU read domain on a range of the object.
3252 *
3253 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
3254 * not entirely valid. The page_cpu_valid member of the object flags which
3255 * pages have been flushed, and will be respected by
3256 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
3257 * of the whole object.
3258 *
3259 * This function returns when the move is complete, including waiting on
3260 * flushes to occur.
3261 */
3262static int
3263i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
3264 uint64_t offset, uint64_t size)
3265{
23010e43 3266 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 3267 uint32_t old_read_domains;
e47c68e9 3268 int i, ret;
673a394b 3269
e47c68e9
EA
3270 if (offset == 0 && size == obj->size)
3271 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 3272
2dafb1e0
CW
3273 ret = i915_gem_object_flush_gpu_write_domain(obj);
3274 if (ret)
3275 return ret;
3276
e47c68e9 3277 /* Wait on any GPU rendering and flushing to occur. */
6a47baa6 3278 ret = i915_gem_object_wait_rendering(obj);
e47c68e9 3279 if (ret != 0)
6a47baa6 3280 return ret;
e47c68e9
EA
3281 i915_gem_object_flush_gtt_write_domain(obj);
3282
3283 /* If we're already fully in the CPU read domain, we're done. */
3284 if (obj_priv->page_cpu_valid == NULL &&
3285 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
3286 return 0;
673a394b 3287
e47c68e9
EA
3288 /* Otherwise, create/clear the per-page CPU read domain flag if we're
3289 * newly adding I915_GEM_DOMAIN_CPU
3290 */
673a394b 3291 if (obj_priv->page_cpu_valid == NULL) {
9a298b2a
EA
3292 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
3293 GFP_KERNEL);
e47c68e9
EA
3294 if (obj_priv->page_cpu_valid == NULL)
3295 return -ENOMEM;
3296 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
3297 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
673a394b
EA
3298
3299 /* Flush the cache on any pages that are still invalid from the CPU's
3300 * perspective.
3301 */
e47c68e9
EA
3302 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
3303 i++) {
673a394b
EA
3304 if (obj_priv->page_cpu_valid[i])
3305 continue;
3306
856fa198 3307 drm_clflush_pages(obj_priv->pages + i, 1);
673a394b
EA
3308
3309 obj_priv->page_cpu_valid[i] = 1;
3310 }
3311
e47c68e9
EA
3312 /* It should now be out of any other write domains, and we can update
3313 * the domain values for our changes.
3314 */
3315 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
3316
1c5d22f7 3317 old_read_domains = obj->read_domains;
e47c68e9
EA
3318 obj->read_domains |= I915_GEM_DOMAIN_CPU;
3319
1c5d22f7
CW
3320 trace_i915_gem_object_change_domain(obj,
3321 old_read_domains,
3322 obj->write_domain);
3323
673a394b
EA
3324 return 0;
3325}
3326
673a394b
EA
3327/**
3328 * Pin an object to the GTT and evaluate the relocations landing in it.
3329 */
3330static int
3331i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
3332 struct drm_file *file_priv,
76446cac 3333 struct drm_i915_gem_exec_object2 *entry,
40a5f0de 3334 struct drm_i915_gem_relocation_entry *relocs)
673a394b
EA
3335{
3336 struct drm_device *dev = obj->dev;
0839ccb8 3337 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 3338 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b 3339 int i, ret;
0839ccb8 3340 void __iomem *reloc_page;
76446cac
JB
3341 bool need_fence;
3342
3343 need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
3344 obj_priv->tiling_mode != I915_TILING_NONE;
3345
3346 /* Check fence reg constraints and rebind if necessary */
808b24d6
CW
3347 if (need_fence &&
3348 !i915_gem_object_fence_offset_ok(obj,
3349 obj_priv->tiling_mode)) {
3350 ret = i915_gem_object_unbind(obj);
3351 if (ret)
3352 return ret;
3353 }
673a394b
EA
3354
3355 /* Choose the GTT offset for our buffer and put it there. */
3356 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
3357 if (ret)
3358 return ret;
3359
76446cac
JB
3360 /*
3361 * Pre-965 chips need a fence register set up in order to
3362 * properly handle blits to/from tiled surfaces.
3363 */
3364 if (need_fence) {
3365 ret = i915_gem_object_get_fence_reg(obj);
3366 if (ret != 0) {
76446cac
JB
3367 i915_gem_object_unpin(obj);
3368 return ret;
3369 }
3370 }
3371
673a394b
EA
3372 entry->offset = obj_priv->gtt_offset;
3373
673a394b
EA
3374 /* Apply the relocations, using the GTT aperture to avoid cache
3375 * flushing requirements.
3376 */
3377 for (i = 0; i < entry->relocation_count; i++) {
40a5f0de 3378 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
673a394b
EA
3379 struct drm_gem_object *target_obj;
3380 struct drm_i915_gem_object *target_obj_priv;
3043c60c
EA
3381 uint32_t reloc_val, reloc_offset;
3382 uint32_t __iomem *reloc_entry;
673a394b 3383
673a394b 3384 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
40a5f0de 3385 reloc->target_handle);
673a394b
EA
3386 if (target_obj == NULL) {
3387 i915_gem_object_unpin(obj);
3388 return -EBADF;
3389 }
23010e43 3390 target_obj_priv = to_intel_bo(target_obj);
673a394b 3391
8542a0bb
CW
3392#if WATCH_RELOC
3393 DRM_INFO("%s: obj %p offset %08x target %d "
3394 "read %08x write %08x gtt %08x "
3395 "presumed %08x delta %08x\n",
3396 __func__,
3397 obj,
3398 (int) reloc->offset,
3399 (int) reloc->target_handle,
3400 (int) reloc->read_domains,
3401 (int) reloc->write_domain,
3402 (int) target_obj_priv->gtt_offset,
3403 (int) reloc->presumed_offset,
3404 reloc->delta);
3405#endif
3406
673a394b
EA
3407 /* The target buffer should have appeared before us in the
3408 * exec_object list, so it should have a GTT space bound by now.
3409 */
3410 if (target_obj_priv->gtt_space == NULL) {
3411 DRM_ERROR("No GTT space found for object %d\n",
40a5f0de 3412 reloc->target_handle);
673a394b
EA
3413 drm_gem_object_unreference(target_obj);
3414 i915_gem_object_unpin(obj);
3415 return -EINVAL;
3416 }
3417
8542a0bb 3418 /* Validate that the target is in a valid r/w GPU domain */
16edd550
DV
3419 if (reloc->write_domain & (reloc->write_domain - 1)) {
3420 DRM_ERROR("reloc with multiple write domains: "
3421 "obj %p target %d offset %d "
3422 "read %08x write %08x",
3423 obj, reloc->target_handle,
3424 (int) reloc->offset,
3425 reloc->read_domains,
3426 reloc->write_domain);
3427 return -EINVAL;
3428 }
40a5f0de
EA
3429 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3430 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
3431 DRM_ERROR("reloc with read/write CPU domains: "
3432 "obj %p target %d offset %d "
3433 "read %08x write %08x",
40a5f0de
EA
3434 obj, reloc->target_handle,
3435 (int) reloc->offset,
3436 reloc->read_domains,
3437 reloc->write_domain);
491152b8
CW
3438 drm_gem_object_unreference(target_obj);
3439 i915_gem_object_unpin(obj);
e47c68e9
EA
3440 return -EINVAL;
3441 }
40a5f0de
EA
3442 if (reloc->write_domain && target_obj->pending_write_domain &&
3443 reloc->write_domain != target_obj->pending_write_domain) {
673a394b
EA
3444 DRM_ERROR("Write domain conflict: "
3445 "obj %p target %d offset %d "
3446 "new %08x old %08x\n",
40a5f0de
EA
3447 obj, reloc->target_handle,
3448 (int) reloc->offset,
3449 reloc->write_domain,
673a394b
EA
3450 target_obj->pending_write_domain);
3451 drm_gem_object_unreference(target_obj);
3452 i915_gem_object_unpin(obj);
3453 return -EINVAL;
3454 }
3455
40a5f0de
EA
3456 target_obj->pending_read_domains |= reloc->read_domains;
3457 target_obj->pending_write_domain |= reloc->write_domain;
673a394b
EA
3458
3459 /* If the relocation already has the right value in it, no
3460 * more work needs to be done.
3461 */
40a5f0de 3462 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
673a394b
EA
3463 drm_gem_object_unreference(target_obj);
3464 continue;
3465 }
3466
8542a0bb
CW
3467 /* Check that the relocation address is valid... */
3468 if (reloc->offset > obj->size - 4) {
3469 DRM_ERROR("Relocation beyond object bounds: "
3470 "obj %p target %d offset %d size %d.\n",
3471 obj, reloc->target_handle,
3472 (int) reloc->offset, (int) obj->size);
3473 drm_gem_object_unreference(target_obj);
3474 i915_gem_object_unpin(obj);
3475 return -EINVAL;
3476 }
3477 if (reloc->offset & 3) {
3478 DRM_ERROR("Relocation not 4-byte aligned: "
3479 "obj %p target %d offset %d.\n",
3480 obj, reloc->target_handle,
3481 (int) reloc->offset);
3482 drm_gem_object_unreference(target_obj);
3483 i915_gem_object_unpin(obj);
3484 return -EINVAL;
3485 }
3486
3487 /* and points to somewhere within the target object. */
3488 if (reloc->delta >= target_obj->size) {
3489 DRM_ERROR("Relocation beyond target object bounds: "
3490 "obj %p target %d delta %d size %d.\n",
3491 obj, reloc->target_handle,
3492 (int) reloc->delta, (int) target_obj->size);
3493 drm_gem_object_unreference(target_obj);
3494 i915_gem_object_unpin(obj);
3495 return -EINVAL;
3496 }
3497
2ef7eeaa
EA
3498 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3499 if (ret != 0) {
3500 drm_gem_object_unreference(target_obj);
3501 i915_gem_object_unpin(obj);
3502 return -EINVAL;
673a394b
EA
3503 }
3504
3505 /* Map the page containing the relocation we're going to
3506 * perform.
3507 */
40a5f0de 3508 reloc_offset = obj_priv->gtt_offset + reloc->offset;
0839ccb8
KP
3509 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3510 (reloc_offset &
fca3ec01
CW
3511 ~(PAGE_SIZE - 1)),
3512 KM_USER0);
3043c60c 3513 reloc_entry = (uint32_t __iomem *)(reloc_page +
0839ccb8 3514 (reloc_offset & (PAGE_SIZE - 1)));
40a5f0de 3515 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
673a394b
EA
3516
3517#if WATCH_BUF
3518 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
40a5f0de 3519 obj, (unsigned int) reloc->offset,
673a394b
EA
3520 readl(reloc_entry), reloc_val);
3521#endif
3522 writel(reloc_val, reloc_entry);
fca3ec01 3523 io_mapping_unmap_atomic(reloc_page, KM_USER0);
673a394b 3524
40a5f0de
EA
3525 /* The updated presumed offset for this entry will be
3526 * copied back out to the user.
673a394b 3527 */
40a5f0de 3528 reloc->presumed_offset = target_obj_priv->gtt_offset;
673a394b
EA
3529
3530 drm_gem_object_unreference(target_obj);
3531 }
3532
673a394b
EA
3533#if WATCH_BUF
3534 if (0)
3535 i915_gem_dump_object(obj, 128, __func__, ~0);
3536#endif
3537 return 0;
3538}
3539
673a394b
EA
3540/* Throttle our rendering by waiting until the ring has completed our requests
3541 * emitted over 20 msec ago.
3542 *
b962442e
EA
3543 * Note that if we were to use the current jiffies each time around the loop,
3544 * we wouldn't escape the function with any frames outstanding if the time to
3545 * render a frame was over 20ms.
3546 *
673a394b
EA
3547 * This should get us reasonable parallelism between CPU and GPU but also
3548 * relatively low latency when blocking on a particular request to finish.
3549 */
3550static int
3551i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3552{
3553 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3554 int ret = 0;
b962442e 3555 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
673a394b
EA
3556
3557 mutex_lock(&dev->struct_mutex);
b962442e
EA
3558 while (!list_empty(&i915_file_priv->mm.request_list)) {
3559 struct drm_i915_gem_request *request;
3560
3561 request = list_first_entry(&i915_file_priv->mm.request_list,
3562 struct drm_i915_gem_request,
3563 client_list);
3564
3565 if (time_after_eq(request->emitted_jiffies, recent_enough))
3566 break;
3567
852835f3 3568 ret = i915_wait_request(dev, request->seqno, request->ring);
b962442e
EA
3569 if (ret != 0)
3570 break;
3571 }
673a394b 3572 mutex_unlock(&dev->struct_mutex);
b962442e 3573
673a394b
EA
3574 return ret;
3575}
3576
40a5f0de 3577static int
76446cac 3578i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
40a5f0de
EA
3579 uint32_t buffer_count,
3580 struct drm_i915_gem_relocation_entry **relocs)
3581{
3582 uint32_t reloc_count = 0, reloc_index = 0, i;
3583 int ret;
3584
3585 *relocs = NULL;
3586 for (i = 0; i < buffer_count; i++) {
3587 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3588 return -EINVAL;
3589 reloc_count += exec_list[i].relocation_count;
3590 }
3591
8e7d2b2c 3592 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
76446cac
JB
3593 if (*relocs == NULL) {
3594 DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
40a5f0de 3595 return -ENOMEM;
76446cac 3596 }
40a5f0de
EA
3597
3598 for (i = 0; i < buffer_count; i++) {
3599 struct drm_i915_gem_relocation_entry __user *user_relocs;
3600
3601 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3602
3603 ret = copy_from_user(&(*relocs)[reloc_index],
3604 user_relocs,
3605 exec_list[i].relocation_count *
3606 sizeof(**relocs));
3607 if (ret != 0) {
8e7d2b2c 3608 drm_free_large(*relocs);
40a5f0de 3609 *relocs = NULL;
2bc43b5c 3610 return -EFAULT;
40a5f0de
EA
3611 }
3612
3613 reloc_index += exec_list[i].relocation_count;
3614 }
3615
2bc43b5c 3616 return 0;
40a5f0de
EA
3617}
3618
3619static int
76446cac 3620i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
40a5f0de
EA
3621 uint32_t buffer_count,
3622 struct drm_i915_gem_relocation_entry *relocs)
3623{
3624 uint32_t reloc_count = 0, i;
2bc43b5c 3625 int ret = 0;
40a5f0de 3626
93533c29
CW
3627 if (relocs == NULL)
3628 return 0;
3629
40a5f0de
EA
3630 for (i = 0; i < buffer_count; i++) {
3631 struct drm_i915_gem_relocation_entry __user *user_relocs;
2bc43b5c 3632 int unwritten;
40a5f0de
EA
3633
3634 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3635
2bc43b5c
FM
3636 unwritten = copy_to_user(user_relocs,
3637 &relocs[reloc_count],
3638 exec_list[i].relocation_count *
3639 sizeof(*relocs));
3640
3641 if (unwritten) {
3642 ret = -EFAULT;
3643 goto err;
40a5f0de
EA
3644 }
3645
3646 reloc_count += exec_list[i].relocation_count;
3647 }
3648
2bc43b5c 3649err:
8e7d2b2c 3650 drm_free_large(relocs);
40a5f0de
EA
3651
3652 return ret;
3653}
3654
83d60795 3655static int
76446cac 3656i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
83d60795
CW
3657 uint64_t exec_offset)
3658{
3659 uint32_t exec_start, exec_len;
3660
3661 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3662 exec_len = (uint32_t) exec->batch_len;
3663
3664 if ((exec_start | exec_len) & 0x7)
3665 return -EINVAL;
3666
3667 if (!exec_start)
3668 return -EINVAL;
3669
3670 return 0;
3671}
3672
6b95a207
KH
3673static int
3674i915_gem_wait_for_pending_flip(struct drm_device *dev,
3675 struct drm_gem_object **object_list,
3676 int count)
3677{
3678 drm_i915_private_t *dev_priv = dev->dev_private;
3679 struct drm_i915_gem_object *obj_priv;
3680 DEFINE_WAIT(wait);
3681 int i, ret = 0;
3682
3683 for (;;) {
3684 prepare_to_wait(&dev_priv->pending_flip_queue,
3685 &wait, TASK_INTERRUPTIBLE);
3686 for (i = 0; i < count; i++) {
23010e43 3687 obj_priv = to_intel_bo(object_list[i]);
6b95a207
KH
3688 if (atomic_read(&obj_priv->pending_flip) > 0)
3689 break;
3690 }
3691 if (i == count)
3692 break;
3693
3694 if (!signal_pending(current)) {
3695 mutex_unlock(&dev->struct_mutex);
3696 schedule();
3697 mutex_lock(&dev->struct_mutex);
3698 continue;
3699 }
3700 ret = -ERESTARTSYS;
3701 break;
3702 }
3703 finish_wait(&dev_priv->pending_flip_queue, &wait);
3704
3705 return ret;
3706}
3707
43b27f40 3708
673a394b 3709int
76446cac
JB
3710i915_gem_do_execbuffer(struct drm_device *dev, void *data,
3711 struct drm_file *file_priv,
3712 struct drm_i915_gem_execbuffer2 *args,
3713 struct drm_i915_gem_exec_object2 *exec_list)
673a394b
EA
3714{
3715 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
3716 struct drm_gem_object **object_list = NULL;
3717 struct drm_gem_object *batch_obj;
b70d11da 3718 struct drm_i915_gem_object *obj_priv;
201361a5 3719 struct drm_clip_rect *cliprects = NULL;
93533c29 3720 struct drm_i915_gem_relocation_entry *relocs = NULL;
76446cac 3721 int ret = 0, ret2, i, pinned = 0;
673a394b 3722 uint64_t exec_offset;
40a5f0de 3723 uint32_t seqno, flush_domains, reloc_index;
6b95a207 3724 int pin_tries, flips;
673a394b 3725
852835f3
ZN
3726 struct intel_ring_buffer *ring = NULL;
3727
673a394b
EA
3728#if WATCH_EXEC
3729 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3730 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3731#endif
d1b851fc
ZN
3732 if (args->flags & I915_EXEC_BSD) {
3733 if (!HAS_BSD(dev)) {
3734 DRM_ERROR("execbuf with wrong flag\n");
3735 return -EINVAL;
3736 }
3737 ring = &dev_priv->bsd_ring;
3738 } else {
3739 ring = &dev_priv->render_ring;
3740 }
3741
4f481ed2
EA
3742 if (args->buffer_count < 1) {
3743 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3744 return -EINVAL;
3745 }
c8e0f93a 3746 object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
76446cac
JB
3747 if (object_list == NULL) {
3748 DRM_ERROR("Failed to allocate object list for %d buffers\n",
673a394b
EA
3749 args->buffer_count);
3750 ret = -ENOMEM;
3751 goto pre_mutex_err;
3752 }
673a394b 3753
201361a5 3754 if (args->num_cliprects != 0) {
9a298b2a
EA
3755 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3756 GFP_KERNEL);
a40e8d31
OA
3757 if (cliprects == NULL) {
3758 ret = -ENOMEM;
201361a5 3759 goto pre_mutex_err;
a40e8d31 3760 }
201361a5
EA
3761
3762 ret = copy_from_user(cliprects,
3763 (struct drm_clip_rect __user *)
3764 (uintptr_t) args->cliprects_ptr,
3765 sizeof(*cliprects) * args->num_cliprects);
3766 if (ret != 0) {
3767 DRM_ERROR("copy %d cliprects failed: %d\n",
3768 args->num_cliprects, ret);
3769 goto pre_mutex_err;
3770 }
3771 }
3772
40a5f0de
EA
3773 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3774 &relocs);
3775 if (ret != 0)
3776 goto pre_mutex_err;
3777
673a394b
EA
3778 mutex_lock(&dev->struct_mutex);
3779
3780 i915_verify_inactive(dev, __FILE__, __LINE__);
3781
ba1234d1 3782 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 3783 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3784 ret = -EIO;
3785 goto pre_mutex_err;
673a394b
EA
3786 }
3787
3788 if (dev_priv->mm.suspended) {
673a394b 3789 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3790 ret = -EBUSY;
3791 goto pre_mutex_err;
673a394b
EA
3792 }
3793
ac94a962 3794 /* Look up object handles */
6b95a207 3795 flips = 0;
673a394b
EA
3796 for (i = 0; i < args->buffer_count; i++) {
3797 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3798 exec_list[i].handle);
3799 if (object_list[i] == NULL) {
3800 DRM_ERROR("Invalid object handle %d at index %d\n",
3801 exec_list[i].handle, i);
0ce907f8
CW
3802 /* prevent error path from reading uninitialized data */
3803 args->buffer_count = i + 1;
673a394b
EA
3804 ret = -EBADF;
3805 goto err;
3806 }
b70d11da 3807
23010e43 3808 obj_priv = to_intel_bo(object_list[i]);
b70d11da
KH
3809 if (obj_priv->in_execbuffer) {
3810 DRM_ERROR("Object %p appears more than once in object list\n",
3811 object_list[i]);
0ce907f8
CW
3812 /* prevent error path from reading uninitialized data */
3813 args->buffer_count = i + 1;
b70d11da
KH
3814 ret = -EBADF;
3815 goto err;
3816 }
3817 obj_priv->in_execbuffer = true;
6b95a207
KH
3818 flips += atomic_read(&obj_priv->pending_flip);
3819 }
3820
3821 if (flips > 0) {
3822 ret = i915_gem_wait_for_pending_flip(dev, object_list,
3823 args->buffer_count);
3824 if (ret)
3825 goto err;
ac94a962 3826 }
673a394b 3827
ac94a962
KP
3828 /* Pin and relocate */
3829 for (pin_tries = 0; ; pin_tries++) {
3830 ret = 0;
40a5f0de
EA
3831 reloc_index = 0;
3832
ac94a962
KP
3833 for (i = 0; i < args->buffer_count; i++) {
3834 object_list[i]->pending_read_domains = 0;
3835 object_list[i]->pending_write_domain = 0;
3836 ret = i915_gem_object_pin_and_relocate(object_list[i],
3837 file_priv,
40a5f0de
EA
3838 &exec_list[i],
3839 &relocs[reloc_index]);
ac94a962
KP
3840 if (ret)
3841 break;
3842 pinned = i + 1;
40a5f0de 3843 reloc_index += exec_list[i].relocation_count;
ac94a962
KP
3844 }
3845 /* success */
3846 if (ret == 0)
3847 break;
3848
3849 /* error other than GTT full, or we've already tried again */
2939e1f5 3850 if (ret != -ENOSPC || pin_tries >= 1) {
07f73f69
CW
3851 if (ret != -ERESTARTSYS) {
3852 unsigned long long total_size = 0;
3d1cc470
CW
3853 int num_fences = 0;
3854 for (i = 0; i < args->buffer_count; i++) {
43b27f40 3855 obj_priv = to_intel_bo(object_list[i]);
3d1cc470 3856
07f73f69 3857 total_size += object_list[i]->size;
3d1cc470
CW
3858 num_fences +=
3859 exec_list[i].flags & EXEC_OBJECT_NEEDS_FENCE &&
3860 obj_priv->tiling_mode != I915_TILING_NONE;
3861 }
3862 DRM_ERROR("Failed to pin buffer %d of %d, total %llu bytes, %d fences: %d\n",
07f73f69 3863 pinned+1, args->buffer_count,
3d1cc470
CW
3864 total_size, num_fences,
3865 ret);
07f73f69
CW
3866 DRM_ERROR("%d objects [%d pinned], "
3867 "%d object bytes [%d pinned], "
3868 "%d/%d gtt bytes\n",
3869 atomic_read(&dev->object_count),
3870 atomic_read(&dev->pin_count),
3871 atomic_read(&dev->object_memory),
3872 atomic_read(&dev->pin_memory),
3873 atomic_read(&dev->gtt_memory),
3874 dev->gtt_total);
3875 }
673a394b
EA
3876 goto err;
3877 }
ac94a962
KP
3878
3879 /* unpin all of our buffers */
3880 for (i = 0; i < pinned; i++)
3881 i915_gem_object_unpin(object_list[i]);
b1177636 3882 pinned = 0;
ac94a962
KP
3883
3884 /* evict everyone we can from the aperture */
3885 ret = i915_gem_evict_everything(dev);
07f73f69 3886 if (ret && ret != -ENOSPC)
ac94a962 3887 goto err;
673a394b
EA
3888 }
3889
3890 /* Set the pending read domains for the batch buffer to COMMAND */
3891 batch_obj = object_list[args->buffer_count-1];
5f26a2c7
CW
3892 if (batch_obj->pending_write_domain) {
3893 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3894 ret = -EINVAL;
3895 goto err;
3896 }
3897 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
673a394b 3898
83d60795
CW
3899 /* Sanity check the batch buffer, prior to moving objects */
3900 exec_offset = exec_list[args->buffer_count - 1].offset;
3901 ret = i915_gem_check_execbuffer (args, exec_offset);
3902 if (ret != 0) {
3903 DRM_ERROR("execbuf with invalid offset/length\n");
3904 goto err;
3905 }
3906
673a394b
EA
3907 i915_verify_inactive(dev, __FILE__, __LINE__);
3908
646f0f6e
KP
3909 /* Zero the global flush/invalidate flags. These
3910 * will be modified as new domains are computed
3911 * for each object
3912 */
3913 dev->invalidate_domains = 0;
3914 dev->flush_domains = 0;
88f356b7 3915 dev_priv->flush_rings = 0;
646f0f6e 3916
673a394b
EA
3917 for (i = 0; i < args->buffer_count; i++) {
3918 struct drm_gem_object *obj = object_list[i];
673a394b 3919
646f0f6e 3920 /* Compute new gpu domains and update invalidate/flush */
8b0e378a 3921 i915_gem_object_set_to_gpu_domain(obj);
673a394b
EA
3922 }
3923
3924 i915_verify_inactive(dev, __FILE__, __LINE__);
3925
646f0f6e
KP
3926 if (dev->invalidate_domains | dev->flush_domains) {
3927#if WATCH_EXEC
3928 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3929 __func__,
3930 dev->invalidate_domains,
3931 dev->flush_domains);
3932#endif
3933 i915_gem_flush(dev,
3934 dev->invalidate_domains,
3935 dev->flush_domains);
88f356b7 3936 if (dev_priv->flush_rings & FLUSH_RENDER_RING)
b962442e 3937 (void)i915_add_request(dev, file_priv,
88f356b7
CW
3938 dev->flush_domains,
3939 &dev_priv->render_ring);
3940 if (dev_priv->flush_rings & FLUSH_BSD_RING)
3941 (void)i915_add_request(dev, file_priv,
3942 dev->flush_domains,
3943 &dev_priv->bsd_ring);
646f0f6e 3944 }
673a394b 3945
efbeed96
EA
3946 for (i = 0; i < args->buffer_count; i++) {
3947 struct drm_gem_object *obj = object_list[i];
23010e43 3948 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
1c5d22f7 3949 uint32_t old_write_domain = obj->write_domain;
efbeed96
EA
3950
3951 obj->write_domain = obj->pending_write_domain;
99fcb766
DV
3952 if (obj->write_domain)
3953 list_move_tail(&obj_priv->gpu_write_list,
3954 &dev_priv->mm.gpu_write_list);
3955 else
3956 list_del_init(&obj_priv->gpu_write_list);
3957
1c5d22f7
CW
3958 trace_i915_gem_object_change_domain(obj,
3959 obj->read_domains,
3960 old_write_domain);
efbeed96
EA
3961 }
3962
673a394b
EA
3963 i915_verify_inactive(dev, __FILE__, __LINE__);
3964
3965#if WATCH_COHERENCY
3966 for (i = 0; i < args->buffer_count; i++) {
3967 i915_gem_object_check_coherency(object_list[i],
3968 exec_list[i].handle);
3969 }
3970#endif
3971
673a394b 3972#if WATCH_EXEC
6911a9b8 3973 i915_gem_dump_object(batch_obj,
673a394b
EA
3974 args->batch_len,
3975 __func__,
3976 ~0);
3977#endif
3978
673a394b 3979 /* Exec the batchbuffer */
852835f3
ZN
3980 ret = ring->dispatch_gem_execbuffer(dev, ring, args,
3981 cliprects, exec_offset);
673a394b
EA
3982 if (ret) {
3983 DRM_ERROR("dispatch failed %d\n", ret);
3984 goto err;
3985 }
3986
3987 /*
3988 * Ensure that the commands in the batch buffer are
3989 * finished before the interrupt fires
3990 */
852835f3 3991 flush_domains = i915_retire_commands(dev, ring);
673a394b
EA
3992
3993 i915_verify_inactive(dev, __FILE__, __LINE__);
3994
3995 /*
3996 * Get a seqno representing the execution of the current buffer,
3997 * which we can wait on. We would like to mitigate these interrupts,
3998 * likely by only creating seqnos occasionally (so that we have
3999 * *some* interrupts representing completion of buffers that we can
4000 * wait on when trying to clear up gtt space).
4001 */
852835f3 4002 seqno = i915_add_request(dev, file_priv, flush_domains, ring);
673a394b 4003 BUG_ON(seqno == 0);
673a394b
EA
4004 for (i = 0; i < args->buffer_count; i++) {
4005 struct drm_gem_object *obj = object_list[i];
852835f3 4006 obj_priv = to_intel_bo(obj);
673a394b 4007
852835f3 4008 i915_gem_object_move_to_active(obj, seqno, ring);
673a394b
EA
4009#if WATCH_LRU
4010 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
4011#endif
4012 }
4013#if WATCH_LRU
4014 i915_dump_lru(dev, __func__);
4015#endif
4016
4017 i915_verify_inactive(dev, __FILE__, __LINE__);
4018
673a394b 4019err:
aad87dff
JL
4020 for (i = 0; i < pinned; i++)
4021 i915_gem_object_unpin(object_list[i]);
4022
b70d11da
KH
4023 for (i = 0; i < args->buffer_count; i++) {
4024 if (object_list[i]) {
23010e43 4025 obj_priv = to_intel_bo(object_list[i]);
b70d11da
KH
4026 obj_priv->in_execbuffer = false;
4027 }
aad87dff 4028 drm_gem_object_unreference(object_list[i]);
b70d11da 4029 }
673a394b 4030
673a394b
EA
4031 mutex_unlock(&dev->struct_mutex);
4032
93533c29 4033pre_mutex_err:
40a5f0de
EA
4034 /* Copy the updated relocations out regardless of current error
4035 * state. Failure to update the relocs would mean that the next
4036 * time userland calls execbuf, it would do so with presumed offset
4037 * state that didn't match the actual object state.
4038 */
4039 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
4040 relocs);
4041 if (ret2 != 0) {
4042 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
4043
4044 if (ret == 0)
4045 ret = ret2;
4046 }
4047
8e7d2b2c 4048 drm_free_large(object_list);
9a298b2a 4049 kfree(cliprects);
673a394b
EA
4050
4051 return ret;
4052}
4053
76446cac
JB
4054/*
4055 * Legacy execbuffer just creates an exec2 list from the original exec object
4056 * list array and passes it to the real function.
4057 */
4058int
4059i915_gem_execbuffer(struct drm_device *dev, void *data,
4060 struct drm_file *file_priv)
4061{
4062 struct drm_i915_gem_execbuffer *args = data;
4063 struct drm_i915_gem_execbuffer2 exec2;
4064 struct drm_i915_gem_exec_object *exec_list = NULL;
4065 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4066 int ret, i;
4067
4068#if WATCH_EXEC
4069 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4070 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4071#endif
4072
4073 if (args->buffer_count < 1) {
4074 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
4075 return -EINVAL;
4076 }
4077
4078 /* Copy in the exec list from userland */
4079 exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
4080 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4081 if (exec_list == NULL || exec2_list == NULL) {
4082 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4083 args->buffer_count);
4084 drm_free_large(exec_list);
4085 drm_free_large(exec2_list);
4086 return -ENOMEM;
4087 }
4088 ret = copy_from_user(exec_list,
4089 (struct drm_i915_relocation_entry __user *)
4090 (uintptr_t) args->buffers_ptr,
4091 sizeof(*exec_list) * args->buffer_count);
4092 if (ret != 0) {
4093 DRM_ERROR("copy %d exec entries failed %d\n",
4094 args->buffer_count, ret);
4095 drm_free_large(exec_list);
4096 drm_free_large(exec2_list);
4097 return -EFAULT;
4098 }
4099
4100 for (i = 0; i < args->buffer_count; i++) {
4101 exec2_list[i].handle = exec_list[i].handle;
4102 exec2_list[i].relocation_count = exec_list[i].relocation_count;
4103 exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
4104 exec2_list[i].alignment = exec_list[i].alignment;
4105 exec2_list[i].offset = exec_list[i].offset;
4106 if (!IS_I965G(dev))
4107 exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
4108 else
4109 exec2_list[i].flags = 0;
4110 }
4111
4112 exec2.buffers_ptr = args->buffers_ptr;
4113 exec2.buffer_count = args->buffer_count;
4114 exec2.batch_start_offset = args->batch_start_offset;
4115 exec2.batch_len = args->batch_len;
4116 exec2.DR1 = args->DR1;
4117 exec2.DR4 = args->DR4;
4118 exec2.num_cliprects = args->num_cliprects;
4119 exec2.cliprects_ptr = args->cliprects_ptr;
852835f3 4120 exec2.flags = I915_EXEC_RENDER;
76446cac
JB
4121
4122 ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
4123 if (!ret) {
4124 /* Copy the new buffer offsets back to the user's exec list. */
4125 for (i = 0; i < args->buffer_count; i++)
4126 exec_list[i].offset = exec2_list[i].offset;
4127 /* ... and back out to userspace */
4128 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4129 (uintptr_t) args->buffers_ptr,
4130 exec_list,
4131 sizeof(*exec_list) * args->buffer_count);
4132 if (ret) {
4133 ret = -EFAULT;
4134 DRM_ERROR("failed to copy %d exec entries "
4135 "back to user (%d)\n",
4136 args->buffer_count, ret);
4137 }
76446cac
JB
4138 }
4139
4140 drm_free_large(exec_list);
4141 drm_free_large(exec2_list);
4142 return ret;
4143}
4144
4145int
4146i915_gem_execbuffer2(struct drm_device *dev, void *data,
4147 struct drm_file *file_priv)
4148{
4149 struct drm_i915_gem_execbuffer2 *args = data;
4150 struct drm_i915_gem_exec_object2 *exec2_list = NULL;
4151 int ret;
4152
4153#if WATCH_EXEC
4154 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
4155 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
4156#endif
4157
4158 if (args->buffer_count < 1) {
4159 DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
4160 return -EINVAL;
4161 }
4162
4163 exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
4164 if (exec2_list == NULL) {
4165 DRM_ERROR("Failed to allocate exec list for %d buffers\n",
4166 args->buffer_count);
4167 return -ENOMEM;
4168 }
4169 ret = copy_from_user(exec2_list,
4170 (struct drm_i915_relocation_entry __user *)
4171 (uintptr_t) args->buffers_ptr,
4172 sizeof(*exec2_list) * args->buffer_count);
4173 if (ret != 0) {
4174 DRM_ERROR("copy %d exec entries failed %d\n",
4175 args->buffer_count, ret);
4176 drm_free_large(exec2_list);
4177 return -EFAULT;
4178 }
4179
4180 ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
4181 if (!ret) {
4182 /* Copy the new buffer offsets back to the user's exec list. */
4183 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
4184 (uintptr_t) args->buffers_ptr,
4185 exec2_list,
4186 sizeof(*exec2_list) * args->buffer_count);
4187 if (ret) {
4188 ret = -EFAULT;
4189 DRM_ERROR("failed to copy %d exec entries "
4190 "back to user (%d)\n",
4191 args->buffer_count, ret);
4192 }
4193 }
4194
4195 drm_free_large(exec2_list);
4196 return ret;
4197}
4198
673a394b
EA
4199int
4200i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
4201{
4202 struct drm_device *dev = obj->dev;
23010e43 4203 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
4204 int ret;
4205
778c3544
DV
4206 BUG_ON(obj_priv->pin_count == DRM_I915_GEM_OBJECT_MAX_PIN_COUNT);
4207
673a394b 4208 i915_verify_inactive(dev, __FILE__, __LINE__);
ac0c6b5a
CW
4209
4210 if (obj_priv->gtt_space != NULL) {
4211 if (alignment == 0)
4212 alignment = i915_gem_get_gtt_alignment(obj);
4213 if (obj_priv->gtt_offset & (alignment - 1)) {
ae7d49d8
CW
4214 WARN(obj_priv->pin_count,
4215 "bo is already pinned with incorrect alignment:"
4216 " offset=%x, req.alignment=%x\n",
4217 obj_priv->gtt_offset, alignment);
ac0c6b5a
CW
4218 ret = i915_gem_object_unbind(obj);
4219 if (ret)
4220 return ret;
4221 }
4222 }
4223
673a394b
EA
4224 if (obj_priv->gtt_space == NULL) {
4225 ret = i915_gem_object_bind_to_gtt(obj, alignment);
9731129c 4226 if (ret)
673a394b 4227 return ret;
22c344e9 4228 }
76446cac 4229
673a394b
EA
4230 obj_priv->pin_count++;
4231
4232 /* If the object is not active and not pending a flush,
4233 * remove it from the inactive list
4234 */
4235 if (obj_priv->pin_count == 1) {
4236 atomic_inc(&dev->pin_count);
4237 atomic_add(obj->size, &dev->pin_memory);
4238 if (!obj_priv->active &&
bf1a1092 4239 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
673a394b
EA
4240 list_del_init(&obj_priv->list);
4241 }
4242 i915_verify_inactive(dev, __FILE__, __LINE__);
4243
4244 return 0;
4245}
4246
4247void
4248i915_gem_object_unpin(struct drm_gem_object *obj)
4249{
4250 struct drm_device *dev = obj->dev;
4251 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 4252 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
673a394b
EA
4253
4254 i915_verify_inactive(dev, __FILE__, __LINE__);
4255 obj_priv->pin_count--;
4256 BUG_ON(obj_priv->pin_count < 0);
4257 BUG_ON(obj_priv->gtt_space == NULL);
4258
4259 /* If the object is no longer pinned, and is
4260 * neither active nor being flushed, then stick it on
4261 * the inactive list
4262 */
4263 if (obj_priv->pin_count == 0) {
4264 if (!obj_priv->active &&
21d509e3 4265 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
673a394b
EA
4266 list_move_tail(&obj_priv->list,
4267 &dev_priv->mm.inactive_list);
4268 atomic_dec(&dev->pin_count);
4269 atomic_sub(obj->size, &dev->pin_memory);
4270 }
4271 i915_verify_inactive(dev, __FILE__, __LINE__);
4272}
4273
4274int
4275i915_gem_pin_ioctl(struct drm_device *dev, void *data,
4276 struct drm_file *file_priv)
4277{
4278 struct drm_i915_gem_pin *args = data;
4279 struct drm_gem_object *obj;
4280 struct drm_i915_gem_object *obj_priv;
4281 int ret;
4282
4283 mutex_lock(&dev->struct_mutex);
4284
4285 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4286 if (obj == NULL) {
4287 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
4288 args->handle);
4289 mutex_unlock(&dev->struct_mutex);
4290 return -EBADF;
4291 }
23010e43 4292 obj_priv = to_intel_bo(obj);
673a394b 4293
bb6baf76
CW
4294 if (obj_priv->madv != I915_MADV_WILLNEED) {
4295 DRM_ERROR("Attempting to pin a purgeable buffer\n");
3ef94daa
CW
4296 drm_gem_object_unreference(obj);
4297 mutex_unlock(&dev->struct_mutex);
4298 return -EINVAL;
4299 }
4300
79e53945
JB
4301 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
4302 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
4303 args->handle);
96dec61d 4304 drm_gem_object_unreference(obj);
673a394b 4305 mutex_unlock(&dev->struct_mutex);
79e53945
JB
4306 return -EINVAL;
4307 }
4308
4309 obj_priv->user_pin_count++;
4310 obj_priv->pin_filp = file_priv;
4311 if (obj_priv->user_pin_count == 1) {
4312 ret = i915_gem_object_pin(obj, args->alignment);
4313 if (ret != 0) {
4314 drm_gem_object_unreference(obj);
4315 mutex_unlock(&dev->struct_mutex);
4316 return ret;
4317 }
673a394b
EA
4318 }
4319
4320 /* XXX - flush the CPU caches for pinned objects
4321 * as the X server doesn't manage domains yet
4322 */
e47c68e9 4323 i915_gem_object_flush_cpu_write_domain(obj);
673a394b
EA
4324 args->offset = obj_priv->gtt_offset;
4325 drm_gem_object_unreference(obj);
4326 mutex_unlock(&dev->struct_mutex);
4327
4328 return 0;
4329}
4330
4331int
4332i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
4333 struct drm_file *file_priv)
4334{
4335 struct drm_i915_gem_pin *args = data;
4336 struct drm_gem_object *obj;
79e53945 4337 struct drm_i915_gem_object *obj_priv;
673a394b
EA
4338
4339 mutex_lock(&dev->struct_mutex);
4340
4341 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4342 if (obj == NULL) {
4343 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
4344 args->handle);
4345 mutex_unlock(&dev->struct_mutex);
4346 return -EBADF;
4347 }
4348
23010e43 4349 obj_priv = to_intel_bo(obj);
79e53945
JB
4350 if (obj_priv->pin_filp != file_priv) {
4351 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
4352 args->handle);
4353 drm_gem_object_unreference(obj);
4354 mutex_unlock(&dev->struct_mutex);
4355 return -EINVAL;
4356 }
4357 obj_priv->user_pin_count--;
4358 if (obj_priv->user_pin_count == 0) {
4359 obj_priv->pin_filp = NULL;
4360 i915_gem_object_unpin(obj);
4361 }
673a394b
EA
4362
4363 drm_gem_object_unreference(obj);
4364 mutex_unlock(&dev->struct_mutex);
4365 return 0;
4366}
4367
4368int
4369i915_gem_busy_ioctl(struct drm_device *dev, void *data,
4370 struct drm_file *file_priv)
4371{
4372 struct drm_i915_gem_busy *args = data;
4373 struct drm_gem_object *obj;
4374 struct drm_i915_gem_object *obj_priv;
4375
673a394b
EA
4376 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4377 if (obj == NULL) {
4378 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
4379 args->handle);
673a394b
EA
4380 return -EBADF;
4381 }
4382
b1ce786c 4383 mutex_lock(&dev->struct_mutex);
d1b851fc 4384
0be555b6
CW
4385 /* Count all active objects as busy, even if they are currently not used
4386 * by the gpu. Users of this interface expect objects to eventually
4387 * become non-busy without any further actions, therefore emit any
4388 * necessary flushes here.
c4de0a5d 4389 */
0be555b6
CW
4390 obj_priv = to_intel_bo(obj);
4391 args->busy = obj_priv->active;
4392 if (args->busy) {
4393 /* Unconditionally flush objects, even when the gpu still uses this
4394 * object. Userspace calling this function indicates that it wants to
4395 * use this buffer rather sooner than later, so issuing the required
4396 * flush earlier is beneficial.
4397 */
4398 if (obj->write_domain) {
4399 i915_gem_flush(dev, 0, obj->write_domain);
4400 (void)i915_add_request(dev, file_priv, obj->write_domain, obj_priv->ring);
4401 }
4402
4403 /* Update the active list for the hardware's current position.
4404 * Otherwise this only updates on a delayed timer or when irqs
4405 * are actually unmasked, and our working set ends up being
4406 * larger than required.
4407 */
4408 i915_gem_retire_requests_ring(dev, obj_priv->ring);
4409
4410 args->busy = obj_priv->active;
4411 }
673a394b
EA
4412
4413 drm_gem_object_unreference(obj);
4414 mutex_unlock(&dev->struct_mutex);
4415 return 0;
4416}
4417
4418int
4419i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
4420 struct drm_file *file_priv)
4421{
4422 return i915_gem_ring_throttle(dev, file_priv);
4423}
4424
3ef94daa
CW
4425int
4426i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
4427 struct drm_file *file_priv)
4428{
4429 struct drm_i915_gem_madvise *args = data;
4430 struct drm_gem_object *obj;
4431 struct drm_i915_gem_object *obj_priv;
4432
4433 switch (args->madv) {
4434 case I915_MADV_DONTNEED:
4435 case I915_MADV_WILLNEED:
4436 break;
4437 default:
4438 return -EINVAL;
4439 }
4440
4441 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
4442 if (obj == NULL) {
4443 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
4444 args->handle);
4445 return -EBADF;
4446 }
4447
4448 mutex_lock(&dev->struct_mutex);
23010e43 4449 obj_priv = to_intel_bo(obj);
3ef94daa
CW
4450
4451 if (obj_priv->pin_count) {
4452 drm_gem_object_unreference(obj);
4453 mutex_unlock(&dev->struct_mutex);
4454
4455 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
4456 return -EINVAL;
4457 }
4458
bb6baf76
CW
4459 if (obj_priv->madv != __I915_MADV_PURGED)
4460 obj_priv->madv = args->madv;
3ef94daa 4461
2d7ef395
CW
4462 /* if the object is no longer bound, discard its backing storage */
4463 if (i915_gem_object_is_purgeable(obj_priv) &&
4464 obj_priv->gtt_space == NULL)
4465 i915_gem_object_truncate(obj);
4466
bb6baf76
CW
4467 args->retained = obj_priv->madv != __I915_MADV_PURGED;
4468
3ef94daa
CW
4469 drm_gem_object_unreference(obj);
4470 mutex_unlock(&dev->struct_mutex);
4471
4472 return 0;
4473}
4474
ac52bc56
DV
4475struct drm_gem_object * i915_gem_alloc_object(struct drm_device *dev,
4476 size_t size)
4477{
c397b908 4478 struct drm_i915_gem_object *obj;
ac52bc56 4479
c397b908
DV
4480 obj = kzalloc(sizeof(*obj), GFP_KERNEL);
4481 if (obj == NULL)
4482 return NULL;
673a394b 4483
c397b908
DV
4484 if (drm_gem_object_init(dev, &obj->base, size) != 0) {
4485 kfree(obj);
4486 return NULL;
4487 }
673a394b 4488
c397b908
DV
4489 obj->base.write_domain = I915_GEM_DOMAIN_CPU;
4490 obj->base.read_domains = I915_GEM_DOMAIN_CPU;
673a394b 4491
c397b908 4492 obj->agp_type = AGP_USER_MEMORY;
62b8b215 4493 obj->base.driver_private = NULL;
c397b908
DV
4494 obj->fence_reg = I915_FENCE_REG_NONE;
4495 INIT_LIST_HEAD(&obj->list);
4496 INIT_LIST_HEAD(&obj->gpu_write_list);
c397b908 4497 obj->madv = I915_MADV_WILLNEED;
de151cf6 4498
c397b908
DV
4499 trace_i915_gem_object_create(&obj->base);
4500
4501 return &obj->base;
4502}
4503
4504int i915_gem_init_object(struct drm_gem_object *obj)
4505{
4506 BUG();
de151cf6 4507
673a394b
EA
4508 return 0;
4509}
4510
be72615b 4511static void i915_gem_free_object_tail(struct drm_gem_object *obj)
673a394b 4512{
de151cf6 4513 struct drm_device *dev = obj->dev;
be72615b 4514 drm_i915_private_t *dev_priv = dev->dev_private;
23010e43 4515 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
be72615b 4516 int ret;
673a394b 4517
be72615b
CW
4518 ret = i915_gem_object_unbind(obj);
4519 if (ret == -ERESTARTSYS) {
4520 list_move(&obj_priv->list,
4521 &dev_priv->mm.deferred_free_list);
4522 return;
4523 }
673a394b 4524
7e616158
CW
4525 if (obj_priv->mmap_offset)
4526 i915_gem_free_mmap_offset(obj);
de151cf6 4527
c397b908
DV
4528 drm_gem_object_release(obj);
4529
9a298b2a 4530 kfree(obj_priv->page_cpu_valid);
280b713b 4531 kfree(obj_priv->bit_17);
c397b908 4532 kfree(obj_priv);
673a394b
EA
4533}
4534
be72615b
CW
4535void i915_gem_free_object(struct drm_gem_object *obj)
4536{
4537 struct drm_device *dev = obj->dev;
4538 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
4539
4540 trace_i915_gem_object_destroy(obj);
4541
4542 while (obj_priv->pin_count > 0)
4543 i915_gem_object_unpin(obj);
4544
4545 if (obj_priv->phys_obj)
4546 i915_gem_detach_phys_object(dev, obj);
4547
4548 i915_gem_free_object_tail(obj);
4549}
4550
ab5ee576 4551/** Unbinds all inactive objects. */
673a394b 4552static int
ab5ee576 4553i915_gem_evict_from_inactive_list(struct drm_device *dev)
673a394b 4554{
ab5ee576 4555 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 4556
ab5ee576
CW
4557 while (!list_empty(&dev_priv->mm.inactive_list)) {
4558 struct drm_gem_object *obj;
4559 int ret;
673a394b 4560
a8089e84
DV
4561 obj = &list_first_entry(&dev_priv->mm.inactive_list,
4562 struct drm_i915_gem_object,
4563 list)->base;
673a394b
EA
4564
4565 ret = i915_gem_object_unbind(obj);
4566 if (ret != 0) {
ab5ee576 4567 DRM_ERROR("Error unbinding object: %d\n", ret);
673a394b
EA
4568 return ret;
4569 }
4570 }
4571
673a394b
EA
4572 return 0;
4573}
4574
29105ccc
CW
4575int
4576i915_gem_idle(struct drm_device *dev)
4577{
4578 drm_i915_private_t *dev_priv = dev->dev_private;
4579 int ret;
28dfe52a 4580
29105ccc 4581 mutex_lock(&dev->struct_mutex);
1c5d22f7 4582
8187a2b7 4583 if (dev_priv->mm.suspended ||
d1b851fc
ZN
4584 (dev_priv->render_ring.gem_object == NULL) ||
4585 (HAS_BSD(dev) &&
4586 dev_priv->bsd_ring.gem_object == NULL)) {
29105ccc
CW
4587 mutex_unlock(&dev->struct_mutex);
4588 return 0;
28dfe52a
EA
4589 }
4590
29105ccc 4591 ret = i915_gpu_idle(dev);
6dbe2772
KP
4592 if (ret) {
4593 mutex_unlock(&dev->struct_mutex);
673a394b 4594 return ret;
6dbe2772 4595 }
673a394b 4596
29105ccc
CW
4597 /* Under UMS, be paranoid and evict. */
4598 if (!drm_core_check_feature(dev, DRIVER_MODESET)) {
4599 ret = i915_gem_evict_from_inactive_list(dev);
4600 if (ret) {
4601 mutex_unlock(&dev->struct_mutex);
4602 return ret;
4603 }
4604 }
4605
4606 /* Hack! Don't let anybody do execbuf while we don't control the chip.
4607 * We need to replace this with a semaphore, or something.
4608 * And not confound mm.suspended!
4609 */
4610 dev_priv->mm.suspended = 1;
4611 del_timer(&dev_priv->hangcheck_timer);
4612
4613 i915_kernel_lost_context(dev);
6dbe2772 4614 i915_gem_cleanup_ringbuffer(dev);
29105ccc 4615
6dbe2772
KP
4616 mutex_unlock(&dev->struct_mutex);
4617
29105ccc
CW
4618 /* Cancel the retire work handler, which should be idle now. */
4619 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
4620
673a394b
EA
4621 return 0;
4622}
4623
e552eb70
JB
4624/*
4625 * 965+ support PIPE_CONTROL commands, which provide finer grained control
4626 * over cache flushing.
4627 */
8187a2b7 4628static int
e552eb70
JB
4629i915_gem_init_pipe_control(struct drm_device *dev)
4630{
4631 drm_i915_private_t *dev_priv = dev->dev_private;
4632 struct drm_gem_object *obj;
4633 struct drm_i915_gem_object *obj_priv;
4634 int ret;
4635
34dc4d44 4636 obj = i915_gem_alloc_object(dev, 4096);
e552eb70
JB
4637 if (obj == NULL) {
4638 DRM_ERROR("Failed to allocate seqno page\n");
4639 ret = -ENOMEM;
4640 goto err;
4641 }
4642 obj_priv = to_intel_bo(obj);
4643 obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
4644
4645 ret = i915_gem_object_pin(obj, 4096);
4646 if (ret)
4647 goto err_unref;
4648
4649 dev_priv->seqno_gfx_addr = obj_priv->gtt_offset;
4650 dev_priv->seqno_page = kmap(obj_priv->pages[0]);
4651 if (dev_priv->seqno_page == NULL)
4652 goto err_unpin;
4653
4654 dev_priv->seqno_obj = obj;
4655 memset(dev_priv->seqno_page, 0, PAGE_SIZE);
4656
4657 return 0;
4658
4659err_unpin:
4660 i915_gem_object_unpin(obj);
4661err_unref:
4662 drm_gem_object_unreference(obj);
4663err:
4664 return ret;
4665}
4666
8187a2b7
ZN
4667
4668static void
e552eb70
JB
4669i915_gem_cleanup_pipe_control(struct drm_device *dev)
4670{
4671 drm_i915_private_t *dev_priv = dev->dev_private;
4672 struct drm_gem_object *obj;
4673 struct drm_i915_gem_object *obj_priv;
4674
4675 obj = dev_priv->seqno_obj;
4676 obj_priv = to_intel_bo(obj);
4677 kunmap(obj_priv->pages[0]);
4678 i915_gem_object_unpin(obj);
4679 drm_gem_object_unreference(obj);
4680 dev_priv->seqno_obj = NULL;
4681
4682 dev_priv->seqno_page = NULL;
673a394b
EA
4683}
4684
8187a2b7
ZN
4685int
4686i915_gem_init_ringbuffer(struct drm_device *dev)
4687{
4688 drm_i915_private_t *dev_priv = dev->dev_private;
4689 int ret;
68f95ba9 4690
8187a2b7 4691 dev_priv->render_ring = render_ring;
68f95ba9 4692
8187a2b7
ZN
4693 if (!I915_NEED_GFX_HWS(dev)) {
4694 dev_priv->render_ring.status_page.page_addr
4695 = dev_priv->status_page_dmah->vaddr;
4696 memset(dev_priv->render_ring.status_page.page_addr,
4697 0, PAGE_SIZE);
4698 }
68f95ba9 4699
8187a2b7
ZN
4700 if (HAS_PIPE_CONTROL(dev)) {
4701 ret = i915_gem_init_pipe_control(dev);
4702 if (ret)
4703 return ret;
4704 }
68f95ba9 4705
8187a2b7 4706 ret = intel_init_ring_buffer(dev, &dev_priv->render_ring);
68f95ba9
CW
4707 if (ret)
4708 goto cleanup_pipe_control;
4709
4710 if (HAS_BSD(dev)) {
d1b851fc
ZN
4711 dev_priv->bsd_ring = bsd_ring;
4712 ret = intel_init_ring_buffer(dev, &dev_priv->bsd_ring);
68f95ba9
CW
4713 if (ret)
4714 goto cleanup_render_ring;
d1b851fc 4715 }
68f95ba9
CW
4716
4717 return 0;
4718
4719cleanup_render_ring:
4720 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
4721cleanup_pipe_control:
4722 if (HAS_PIPE_CONTROL(dev))
4723 i915_gem_cleanup_pipe_control(dev);
8187a2b7
ZN
4724 return ret;
4725}
4726
4727void
4728i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4729{
4730 drm_i915_private_t *dev_priv = dev->dev_private;
4731
4732 intel_cleanup_ring_buffer(dev, &dev_priv->render_ring);
d1b851fc
ZN
4733 if (HAS_BSD(dev))
4734 intel_cleanup_ring_buffer(dev, &dev_priv->bsd_ring);
8187a2b7
ZN
4735 if (HAS_PIPE_CONTROL(dev))
4736 i915_gem_cleanup_pipe_control(dev);
4737}
4738
673a394b
EA
4739int
4740i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4741 struct drm_file *file_priv)
4742{
4743 drm_i915_private_t *dev_priv = dev->dev_private;
4744 int ret;
4745
79e53945
JB
4746 if (drm_core_check_feature(dev, DRIVER_MODESET))
4747 return 0;
4748
ba1234d1 4749 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 4750 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 4751 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
4752 }
4753
673a394b 4754 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
4755 dev_priv->mm.suspended = 0;
4756
4757 ret = i915_gem_init_ringbuffer(dev);
d816f6ac
WF
4758 if (ret != 0) {
4759 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 4760 return ret;
d816f6ac 4761 }
9bb2d6f9 4762
5e118f41 4763 spin_lock(&dev_priv->mm.active_list_lock);
852835f3 4764 BUG_ON(!list_empty(&dev_priv->render_ring.active_list));
d1b851fc 4765 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.active_list));
5e118f41
CW
4766 spin_unlock(&dev_priv->mm.active_list_lock);
4767
673a394b
EA
4768 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4769 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
852835f3 4770 BUG_ON(!list_empty(&dev_priv->render_ring.request_list));
d1b851fc 4771 BUG_ON(HAS_BSD(dev) && !list_empty(&dev_priv->bsd_ring.request_list));
673a394b 4772 mutex_unlock(&dev->struct_mutex);
dbb19d30 4773
5f35308b
CW
4774 ret = drm_irq_install(dev);
4775 if (ret)
4776 goto cleanup_ringbuffer;
dbb19d30 4777
673a394b 4778 return 0;
5f35308b
CW
4779
4780cleanup_ringbuffer:
4781 mutex_lock(&dev->struct_mutex);
4782 i915_gem_cleanup_ringbuffer(dev);
4783 dev_priv->mm.suspended = 1;
4784 mutex_unlock(&dev->struct_mutex);
4785
4786 return ret;
673a394b
EA
4787}
4788
4789int
4790i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4791 struct drm_file *file_priv)
4792{
79e53945
JB
4793 if (drm_core_check_feature(dev, DRIVER_MODESET))
4794 return 0;
4795
dbb19d30 4796 drm_irq_uninstall(dev);
e6890f6f 4797 return i915_gem_idle(dev);
673a394b
EA
4798}
4799
4800void
4801i915_gem_lastclose(struct drm_device *dev)
4802{
4803 int ret;
673a394b 4804
e806b495
EA
4805 if (drm_core_check_feature(dev, DRIVER_MODESET))
4806 return;
4807
6dbe2772
KP
4808 ret = i915_gem_idle(dev);
4809 if (ret)
4810 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
4811}
4812
4813void
4814i915_gem_load(struct drm_device *dev)
4815{
b5aa8a0f 4816 int i;
673a394b
EA
4817 drm_i915_private_t *dev_priv = dev->dev_private;
4818
5e118f41 4819 spin_lock_init(&dev_priv->mm.active_list_lock);
673a394b 4820 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
99fcb766 4821 INIT_LIST_HEAD(&dev_priv->mm.gpu_write_list);
673a394b 4822 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
a09ba7fa 4823 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
be72615b 4824 INIT_LIST_HEAD(&dev_priv->mm.deferred_free_list);
852835f3
ZN
4825 INIT_LIST_HEAD(&dev_priv->render_ring.active_list);
4826 INIT_LIST_HEAD(&dev_priv->render_ring.request_list);
d1b851fc
ZN
4827 if (HAS_BSD(dev)) {
4828 INIT_LIST_HEAD(&dev_priv->bsd_ring.active_list);
4829 INIT_LIST_HEAD(&dev_priv->bsd_ring.request_list);
4830 }
007cc8ac
DV
4831 for (i = 0; i < 16; i++)
4832 INIT_LIST_HEAD(&dev_priv->fence_regs[i].lru_list);
673a394b
EA
4833 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4834 i915_gem_retire_work_handler);
31169714
CW
4835 spin_lock(&shrink_list_lock);
4836 list_add(&dev_priv->mm.shrink_list, &shrink_list);
4837 spin_unlock(&shrink_list_lock);
4838
94400120
DA
4839 /* On GEN3 we really need to make sure the ARB C3 LP bit is set */
4840 if (IS_GEN3(dev)) {
4841 u32 tmp = I915_READ(MI_ARB_STATE);
4842 if (!(tmp & MI_ARB_C3_LP_WRITE_ENABLE)) {
4843 /* arb state is a masked write, so set bit + bit in mask */
4844 tmp = MI_ARB_C3_LP_WRITE_ENABLE | (MI_ARB_C3_LP_WRITE_ENABLE << MI_ARB_MASK_SHIFT);
4845 I915_WRITE(MI_ARB_STATE, tmp);
4846 }
4847 }
4848
de151cf6 4849 /* Old X drivers will take 0-2 for front, back, depth buffers */
b397c836
EA
4850 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4851 dev_priv->fence_reg_start = 3;
de151cf6 4852
0f973f27 4853 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4854 dev_priv->num_fence_regs = 16;
4855 else
4856 dev_priv->num_fence_regs = 8;
4857
b5aa8a0f
GH
4858 /* Initialize fence registers to zero */
4859 if (IS_I965G(dev)) {
4860 for (i = 0; i < 16; i++)
4861 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4862 } else {
4863 for (i = 0; i < 8; i++)
4864 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4865 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4866 for (i = 0; i < 8; i++)
4867 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4868 }
673a394b 4869 i915_gem_detect_bit_6_swizzle(dev);
6b95a207 4870 init_waitqueue_head(&dev_priv->pending_flip_queue);
673a394b 4871}
71acb5eb
DA
4872
4873/*
4874 * Create a physically contiguous memory object for this object
4875 * e.g. for cursor + overlay regs
4876 */
4877int i915_gem_init_phys_object(struct drm_device *dev,
4878 int id, int size)
4879{
4880 drm_i915_private_t *dev_priv = dev->dev_private;
4881 struct drm_i915_gem_phys_object *phys_obj;
4882 int ret;
4883
4884 if (dev_priv->mm.phys_objs[id - 1] || !size)
4885 return 0;
4886
9a298b2a 4887 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
4888 if (!phys_obj)
4889 return -ENOMEM;
4890
4891 phys_obj->id = id;
4892
e6be8d9d 4893 phys_obj->handle = drm_pci_alloc(dev, size, 0);
71acb5eb
DA
4894 if (!phys_obj->handle) {
4895 ret = -ENOMEM;
4896 goto kfree_obj;
4897 }
4898#ifdef CONFIG_X86
4899 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4900#endif
4901
4902 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4903
4904 return 0;
4905kfree_obj:
9a298b2a 4906 kfree(phys_obj);
71acb5eb
DA
4907 return ret;
4908}
4909
4910void i915_gem_free_phys_object(struct drm_device *dev, int id)
4911{
4912 drm_i915_private_t *dev_priv = dev->dev_private;
4913 struct drm_i915_gem_phys_object *phys_obj;
4914
4915 if (!dev_priv->mm.phys_objs[id - 1])
4916 return;
4917
4918 phys_obj = dev_priv->mm.phys_objs[id - 1];
4919 if (phys_obj->cur_obj) {
4920 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4921 }
4922
4923#ifdef CONFIG_X86
4924 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4925#endif
4926 drm_pci_free(dev, phys_obj->handle);
4927 kfree(phys_obj);
4928 dev_priv->mm.phys_objs[id - 1] = NULL;
4929}
4930
4931void i915_gem_free_all_phys_object(struct drm_device *dev)
4932{
4933 int i;
4934
260883c8 4935 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4936 i915_gem_free_phys_object(dev, i);
4937}
4938
4939void i915_gem_detach_phys_object(struct drm_device *dev,
4940 struct drm_gem_object *obj)
4941{
4942 struct drm_i915_gem_object *obj_priv;
4943 int i;
4944 int ret;
4945 int page_count;
4946
23010e43 4947 obj_priv = to_intel_bo(obj);
71acb5eb
DA
4948 if (!obj_priv->phys_obj)
4949 return;
4950
4bdadb97 4951 ret = i915_gem_object_get_pages(obj, 0);
71acb5eb
DA
4952 if (ret)
4953 goto out;
4954
4955 page_count = obj->size / PAGE_SIZE;
4956
4957 for (i = 0; i < page_count; i++) {
856fa198 4958 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4959 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4960
4961 memcpy(dst, src, PAGE_SIZE);
4962 kunmap_atomic(dst, KM_USER0);
4963 }
856fa198 4964 drm_clflush_pages(obj_priv->pages, page_count);
71acb5eb 4965 drm_agp_chipset_flush(dev);
d78b47b9
CW
4966
4967 i915_gem_object_put_pages(obj);
71acb5eb
DA
4968out:
4969 obj_priv->phys_obj->cur_obj = NULL;
4970 obj_priv->phys_obj = NULL;
4971}
4972
4973int
4974i915_gem_attach_phys_object(struct drm_device *dev,
4975 struct drm_gem_object *obj, int id)
4976{
4977 drm_i915_private_t *dev_priv = dev->dev_private;
4978 struct drm_i915_gem_object *obj_priv;
4979 int ret = 0;
4980 int page_count;
4981 int i;
4982
4983 if (id > I915_MAX_PHYS_OBJECT)
4984 return -EINVAL;
4985
23010e43 4986 obj_priv = to_intel_bo(obj);
71acb5eb
DA
4987
4988 if (obj_priv->phys_obj) {
4989 if (obj_priv->phys_obj->id == id)
4990 return 0;
4991 i915_gem_detach_phys_object(dev, obj);
4992 }
4993
4994
4995 /* create a new object */
4996 if (!dev_priv->mm.phys_objs[id - 1]) {
4997 ret = i915_gem_init_phys_object(dev, id,
4998 obj->size);
4999 if (ret) {
aeb565df 5000 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
71acb5eb
DA
5001 goto out;
5002 }
5003 }
5004
5005 /* bind to the object */
5006 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
5007 obj_priv->phys_obj->cur_obj = obj;
5008
4bdadb97 5009 ret = i915_gem_object_get_pages(obj, 0);
71acb5eb
DA
5010 if (ret) {
5011 DRM_ERROR("failed to get page list\n");
5012 goto out;
5013 }
5014
5015 page_count = obj->size / PAGE_SIZE;
5016
5017 for (i = 0; i < page_count; i++) {
856fa198 5018 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
5019 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
5020
5021 memcpy(dst, src, PAGE_SIZE);
5022 kunmap_atomic(src, KM_USER0);
5023 }
5024
d78b47b9
CW
5025 i915_gem_object_put_pages(obj);
5026
71acb5eb
DA
5027 return 0;
5028out:
5029 return ret;
5030}
5031
5032static int
5033i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
5034 struct drm_i915_gem_pwrite *args,
5035 struct drm_file *file_priv)
5036{
23010e43 5037 struct drm_i915_gem_object *obj_priv = to_intel_bo(obj);
71acb5eb
DA
5038 void *obj_addr;
5039 int ret;
5040 char __user *user_data;
5041
5042 user_data = (char __user *) (uintptr_t) args->data_ptr;
5043 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
5044
44d98a61 5045 DRM_DEBUG_DRIVER("obj_addr %p, %lld\n", obj_addr, args->size);
71acb5eb
DA
5046 ret = copy_from_user(obj_addr, user_data, args->size);
5047 if (ret)
5048 return -EFAULT;
5049
5050 drm_agp_chipset_flush(dev);
5051 return 0;
5052}
b962442e
EA
5053
5054void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
5055{
5056 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
5057
5058 /* Clean up our request list when the client is going away, so that
5059 * later retire_requests won't dereference our soon-to-be-gone
5060 * file_priv.
5061 */
5062 mutex_lock(&dev->struct_mutex);
5063 while (!list_empty(&i915_file_priv->mm.request_list))
5064 list_del_init(i915_file_priv->mm.request_list.next);
5065 mutex_unlock(&dev->struct_mutex);
5066}
31169714 5067
1637ef41
CW
5068static int
5069i915_gpu_is_active(struct drm_device *dev)
5070{
5071 drm_i915_private_t *dev_priv = dev->dev_private;
5072 int lists_empty;
5073
5074 spin_lock(&dev_priv->mm.active_list_lock);
5075 lists_empty = list_empty(&dev_priv->mm.flushing_list) &&
852835f3 5076 list_empty(&dev_priv->render_ring.active_list);
d1b851fc
ZN
5077 if (HAS_BSD(dev))
5078 lists_empty &= list_empty(&dev_priv->bsd_ring.active_list);
1637ef41
CW
5079 spin_unlock(&dev_priv->mm.active_list_lock);
5080
5081 return !lists_empty;
5082}
5083
31169714 5084static int
7f8275d0 5085i915_gem_shrink(struct shrinker *shrink, int nr_to_scan, gfp_t gfp_mask)
31169714
CW
5086{
5087 drm_i915_private_t *dev_priv, *next_dev;
5088 struct drm_i915_gem_object *obj_priv, *next_obj;
5089 int cnt = 0;
5090 int would_deadlock = 1;
5091
5092 /* "fast-path" to count number of available objects */
5093 if (nr_to_scan == 0) {
5094 spin_lock(&shrink_list_lock);
5095 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5096 struct drm_device *dev = dev_priv->dev;
5097
5098 if (mutex_trylock(&dev->struct_mutex)) {
5099 list_for_each_entry(obj_priv,
5100 &dev_priv->mm.inactive_list,
5101 list)
5102 cnt++;
5103 mutex_unlock(&dev->struct_mutex);
5104 }
5105 }
5106 spin_unlock(&shrink_list_lock);
5107
5108 return (cnt / 100) * sysctl_vfs_cache_pressure;
5109 }
5110
5111 spin_lock(&shrink_list_lock);
5112
1637ef41 5113rescan:
31169714
CW
5114 /* first scan for clean buffers */
5115 list_for_each_entry_safe(dev_priv, next_dev,
5116 &shrink_list, mm.shrink_list) {
5117 struct drm_device *dev = dev_priv->dev;
5118
5119 if (! mutex_trylock(&dev->struct_mutex))
5120 continue;
5121
5122 spin_unlock(&shrink_list_lock);
b09a1fec 5123 i915_gem_retire_requests(dev);
31169714
CW
5124
5125 list_for_each_entry_safe(obj_priv, next_obj,
5126 &dev_priv->mm.inactive_list,
5127 list) {
5128 if (i915_gem_object_is_purgeable(obj_priv)) {
a8089e84 5129 i915_gem_object_unbind(&obj_priv->base);
31169714
CW
5130 if (--nr_to_scan <= 0)
5131 break;
5132 }
5133 }
5134
5135 spin_lock(&shrink_list_lock);
5136 mutex_unlock(&dev->struct_mutex);
5137
963b4836
CW
5138 would_deadlock = 0;
5139
31169714
CW
5140 if (nr_to_scan <= 0)
5141 break;
5142 }
5143
5144 /* second pass, evict/count anything still on the inactive list */
5145 list_for_each_entry_safe(dev_priv, next_dev,
5146 &shrink_list, mm.shrink_list) {
5147 struct drm_device *dev = dev_priv->dev;
5148
5149 if (! mutex_trylock(&dev->struct_mutex))
5150 continue;
5151
5152 spin_unlock(&shrink_list_lock);
5153
5154 list_for_each_entry_safe(obj_priv, next_obj,
5155 &dev_priv->mm.inactive_list,
5156 list) {
5157 if (nr_to_scan > 0) {
a8089e84 5158 i915_gem_object_unbind(&obj_priv->base);
31169714
CW
5159 nr_to_scan--;
5160 } else
5161 cnt++;
5162 }
5163
5164 spin_lock(&shrink_list_lock);
5165 mutex_unlock(&dev->struct_mutex);
5166
5167 would_deadlock = 0;
5168 }
5169
1637ef41
CW
5170 if (nr_to_scan) {
5171 int active = 0;
5172
5173 /*
5174 * We are desperate for pages, so as a last resort, wait
5175 * for the GPU to finish and discard whatever we can.
5176 * This has a dramatic impact to reduce the number of
5177 * OOM-killer events whilst running the GPU aggressively.
5178 */
5179 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
5180 struct drm_device *dev = dev_priv->dev;
5181
5182 if (!mutex_trylock(&dev->struct_mutex))
5183 continue;
5184
5185 spin_unlock(&shrink_list_lock);
5186
5187 if (i915_gpu_is_active(dev)) {
5188 i915_gpu_idle(dev);
5189 active++;
5190 }
5191
5192 spin_lock(&shrink_list_lock);
5193 mutex_unlock(&dev->struct_mutex);
5194 }
5195
5196 if (active)
5197 goto rescan;
5198 }
5199
31169714
CW
5200 spin_unlock(&shrink_list_lock);
5201
5202 if (would_deadlock)
5203 return -1;
5204 else if (cnt > 0)
5205 return (cnt / 100) * sysctl_vfs_cache_pressure;
5206 else
5207 return 0;
5208}
5209
5210static struct shrinker shrinker = {
5211 .shrink = i915_gem_shrink,
5212 .seeks = DEFAULT_SEEKS,
5213};
5214
5215__init void
5216i915_gem_shrinker_init(void)
5217{
5218 register_shrinker(&shrinker);
5219}
5220
5221__exit void
5222i915_gem_shrinker_exit(void)
5223{
5224 unregister_shrinker(&shrinker);
5225}