]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/gpu/drm/i915/i915_gem.c
drm/i915: Add ioctl to set 'purgeability' of objects
[net-next-2.6.git] / drivers / gpu / drm / i915 / i915_gem.c
CommitLineData
673a394b
EA
1/*
2 * Copyright © 2008 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * Authors:
24 * Eric Anholt <eric@anholt.net>
25 *
26 */
27
28#include "drmP.h"
29#include "drm.h"
30#include "i915_drm.h"
31#include "i915_drv.h"
652c393a 32#include "intel_drv.h"
673a394b 33#include <linux/swap.h>
79e53945 34#include <linux/pci.h>
673a394b 35
28dfe52a
EA
36#define I915_GEM_GPU_DOMAINS (~(I915_GEM_DOMAIN_CPU | I915_GEM_DOMAIN_GTT))
37
e47c68e9
EA
38static void i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj);
39static void i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj);
40static void i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj);
e47c68e9
EA
41static int i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj,
42 int write);
43static int i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
44 uint64_t offset,
45 uint64_t size);
46static void i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj);
673a394b 47static int i915_gem_object_wait_rendering(struct drm_gem_object *obj);
de151cf6
JB
48static int i915_gem_object_bind_to_gtt(struct drm_gem_object *obj,
49 unsigned alignment);
de151cf6
JB
50static void i915_gem_clear_fence_reg(struct drm_gem_object *obj);
51static int i915_gem_evict_something(struct drm_device *dev);
71acb5eb
DA
52static int i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
53 struct drm_i915_gem_pwrite *args,
54 struct drm_file *file_priv);
673a394b 55
31169714
CW
56static LIST_HEAD(shrink_list);
57static DEFINE_SPINLOCK(shrink_list_lock);
58
79e53945
JB
59int i915_gem_do_init(struct drm_device *dev, unsigned long start,
60 unsigned long end)
673a394b
EA
61{
62 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 63
79e53945
JB
64 if (start >= end ||
65 (start & (PAGE_SIZE - 1)) != 0 ||
66 (end & (PAGE_SIZE - 1)) != 0) {
673a394b
EA
67 return -EINVAL;
68 }
69
79e53945
JB
70 drm_mm_init(&dev_priv->mm.gtt_space, start,
71 end - start);
673a394b 72
79e53945
JB
73 dev->gtt_total = (uint32_t) (end - start);
74
75 return 0;
76}
673a394b 77
79e53945
JB
78int
79i915_gem_init_ioctl(struct drm_device *dev, void *data,
80 struct drm_file *file_priv)
81{
82 struct drm_i915_gem_init *args = data;
83 int ret;
84
85 mutex_lock(&dev->struct_mutex);
86 ret = i915_gem_do_init(dev, args->gtt_start, args->gtt_end);
673a394b
EA
87 mutex_unlock(&dev->struct_mutex);
88
79e53945 89 return ret;
673a394b
EA
90}
91
5a125c3c
EA
92int
93i915_gem_get_aperture_ioctl(struct drm_device *dev, void *data,
94 struct drm_file *file_priv)
95{
5a125c3c 96 struct drm_i915_gem_get_aperture *args = data;
5a125c3c
EA
97
98 if (!(dev->driver->driver_features & DRIVER_GEM))
99 return -ENODEV;
100
101 args->aper_size = dev->gtt_total;
2678d9d6
KP
102 args->aper_available_size = (args->aper_size -
103 atomic_read(&dev->pin_memory));
5a125c3c
EA
104
105 return 0;
106}
107
673a394b
EA
108
109/**
110 * Creates a new mm object and returns a handle to it.
111 */
112int
113i915_gem_create_ioctl(struct drm_device *dev, void *data,
114 struct drm_file *file_priv)
115{
116 struct drm_i915_gem_create *args = data;
117 struct drm_gem_object *obj;
a1a2d1d3
PP
118 int ret;
119 u32 handle;
673a394b
EA
120
121 args->size = roundup(args->size, PAGE_SIZE);
122
123 /* Allocate the new object */
124 obj = drm_gem_object_alloc(dev, args->size);
125 if (obj == NULL)
126 return -ENOMEM;
127
128 ret = drm_gem_handle_create(file_priv, obj, &handle);
129 mutex_lock(&dev->struct_mutex);
130 drm_gem_object_handle_unreference(obj);
131 mutex_unlock(&dev->struct_mutex);
132
133 if (ret)
134 return ret;
135
136 args->handle = handle;
137
138 return 0;
139}
140
eb01459f
EA
141static inline int
142fast_shmem_read(struct page **pages,
143 loff_t page_base, int page_offset,
144 char __user *data,
145 int length)
146{
147 char __iomem *vaddr;
2bc43b5c 148 int unwritten;
eb01459f
EA
149
150 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
151 if (vaddr == NULL)
152 return -ENOMEM;
2bc43b5c 153 unwritten = __copy_to_user_inatomic(data, vaddr + page_offset, length);
eb01459f
EA
154 kunmap_atomic(vaddr, KM_USER0);
155
2bc43b5c
FM
156 if (unwritten)
157 return -EFAULT;
158
159 return 0;
eb01459f
EA
160}
161
280b713b
EA
162static int i915_gem_object_needs_bit17_swizzle(struct drm_gem_object *obj)
163{
164 drm_i915_private_t *dev_priv = obj->dev->dev_private;
165 struct drm_i915_gem_object *obj_priv = obj->driver_private;
166
167 return dev_priv->mm.bit_6_swizzle_x == I915_BIT_6_SWIZZLE_9_10_17 &&
168 obj_priv->tiling_mode != I915_TILING_NONE;
169}
170
40123c1f
EA
171static inline int
172slow_shmem_copy(struct page *dst_page,
173 int dst_offset,
174 struct page *src_page,
175 int src_offset,
176 int length)
177{
178 char *dst_vaddr, *src_vaddr;
179
180 dst_vaddr = kmap_atomic(dst_page, KM_USER0);
181 if (dst_vaddr == NULL)
182 return -ENOMEM;
183
184 src_vaddr = kmap_atomic(src_page, KM_USER1);
185 if (src_vaddr == NULL) {
186 kunmap_atomic(dst_vaddr, KM_USER0);
187 return -ENOMEM;
188 }
189
190 memcpy(dst_vaddr + dst_offset, src_vaddr + src_offset, length);
191
192 kunmap_atomic(src_vaddr, KM_USER1);
193 kunmap_atomic(dst_vaddr, KM_USER0);
194
195 return 0;
196}
197
280b713b
EA
198static inline int
199slow_shmem_bit17_copy(struct page *gpu_page,
200 int gpu_offset,
201 struct page *cpu_page,
202 int cpu_offset,
203 int length,
204 int is_read)
205{
206 char *gpu_vaddr, *cpu_vaddr;
207
208 /* Use the unswizzled path if this page isn't affected. */
209 if ((page_to_phys(gpu_page) & (1 << 17)) == 0) {
210 if (is_read)
211 return slow_shmem_copy(cpu_page, cpu_offset,
212 gpu_page, gpu_offset, length);
213 else
214 return slow_shmem_copy(gpu_page, gpu_offset,
215 cpu_page, cpu_offset, length);
216 }
217
218 gpu_vaddr = kmap_atomic(gpu_page, KM_USER0);
219 if (gpu_vaddr == NULL)
220 return -ENOMEM;
221
222 cpu_vaddr = kmap_atomic(cpu_page, KM_USER1);
223 if (cpu_vaddr == NULL) {
224 kunmap_atomic(gpu_vaddr, KM_USER0);
225 return -ENOMEM;
226 }
227
228 /* Copy the data, XORing A6 with A17 (1). The user already knows he's
229 * XORing with the other bits (A9 for Y, A9 and A10 for X)
230 */
231 while (length > 0) {
232 int cacheline_end = ALIGN(gpu_offset + 1, 64);
233 int this_length = min(cacheline_end - gpu_offset, length);
234 int swizzled_gpu_offset = gpu_offset ^ 64;
235
236 if (is_read) {
237 memcpy(cpu_vaddr + cpu_offset,
238 gpu_vaddr + swizzled_gpu_offset,
239 this_length);
240 } else {
241 memcpy(gpu_vaddr + swizzled_gpu_offset,
242 cpu_vaddr + cpu_offset,
243 this_length);
244 }
245 cpu_offset += this_length;
246 gpu_offset += this_length;
247 length -= this_length;
248 }
249
250 kunmap_atomic(cpu_vaddr, KM_USER1);
251 kunmap_atomic(gpu_vaddr, KM_USER0);
252
253 return 0;
254}
255
eb01459f
EA
256/**
257 * This is the fast shmem pread path, which attempts to copy_from_user directly
258 * from the backing pages of the object to the user's address space. On a
259 * fault, it fails so we can fall back to i915_gem_shmem_pwrite_slow().
260 */
261static int
262i915_gem_shmem_pread_fast(struct drm_device *dev, struct drm_gem_object *obj,
263 struct drm_i915_gem_pread *args,
264 struct drm_file *file_priv)
265{
266 struct drm_i915_gem_object *obj_priv = obj->driver_private;
267 ssize_t remain;
268 loff_t offset, page_base;
269 char __user *user_data;
270 int page_offset, page_length;
271 int ret;
272
273 user_data = (char __user *) (uintptr_t) args->data_ptr;
274 remain = args->size;
275
276 mutex_lock(&dev->struct_mutex);
277
278 ret = i915_gem_object_get_pages(obj);
279 if (ret != 0)
280 goto fail_unlock;
281
282 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
283 args->size);
284 if (ret != 0)
285 goto fail_put_pages;
286
287 obj_priv = obj->driver_private;
288 offset = args->offset;
289
290 while (remain > 0) {
291 /* Operation in this page
292 *
293 * page_base = page offset within aperture
294 * page_offset = offset within page
295 * page_length = bytes to copy for this page
296 */
297 page_base = (offset & ~(PAGE_SIZE-1));
298 page_offset = offset & (PAGE_SIZE-1);
299 page_length = remain;
300 if ((page_offset + remain) > PAGE_SIZE)
301 page_length = PAGE_SIZE - page_offset;
302
303 ret = fast_shmem_read(obj_priv->pages,
304 page_base, page_offset,
305 user_data, page_length);
306 if (ret)
307 goto fail_put_pages;
308
309 remain -= page_length;
310 user_data += page_length;
311 offset += page_length;
312 }
313
314fail_put_pages:
315 i915_gem_object_put_pages(obj);
316fail_unlock:
317 mutex_unlock(&dev->struct_mutex);
318
319 return ret;
320}
321
322/**
323 * This is the fallback shmem pread path, which allocates temporary storage
324 * in kernel space to copy_to_user into outside of the struct_mutex, so we
325 * can copy out of the object's backing pages while holding the struct mutex
326 * and not take page faults.
327 */
328static int
329i915_gem_shmem_pread_slow(struct drm_device *dev, struct drm_gem_object *obj,
330 struct drm_i915_gem_pread *args,
331 struct drm_file *file_priv)
332{
333 struct drm_i915_gem_object *obj_priv = obj->driver_private;
334 struct mm_struct *mm = current->mm;
335 struct page **user_pages;
336 ssize_t remain;
337 loff_t offset, pinned_pages, i;
338 loff_t first_data_page, last_data_page, num_pages;
339 int shmem_page_index, shmem_page_offset;
340 int data_page_index, data_page_offset;
341 int page_length;
342 int ret;
343 uint64_t data_ptr = args->data_ptr;
280b713b 344 int do_bit17_swizzling;
eb01459f
EA
345
346 remain = args->size;
347
348 /* Pin the user pages containing the data. We can't fault while
349 * holding the struct mutex, yet we want to hold it while
350 * dereferencing the user data.
351 */
352 first_data_page = data_ptr / PAGE_SIZE;
353 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
354 num_pages = last_data_page - first_data_page + 1;
355
8e7d2b2c 356 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
eb01459f
EA
357 if (user_pages == NULL)
358 return -ENOMEM;
359
360 down_read(&mm->mmap_sem);
361 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
e5e9ecde 362 num_pages, 1, 0, user_pages, NULL);
eb01459f
EA
363 up_read(&mm->mmap_sem);
364 if (pinned_pages < num_pages) {
365 ret = -EFAULT;
366 goto fail_put_user_pages;
367 }
368
280b713b
EA
369 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
370
eb01459f
EA
371 mutex_lock(&dev->struct_mutex);
372
373 ret = i915_gem_object_get_pages(obj);
374 if (ret != 0)
375 goto fail_unlock;
376
377 ret = i915_gem_object_set_cpu_read_domain_range(obj, args->offset,
378 args->size);
379 if (ret != 0)
380 goto fail_put_pages;
381
382 obj_priv = obj->driver_private;
383 offset = args->offset;
384
385 while (remain > 0) {
386 /* Operation in this page
387 *
388 * shmem_page_index = page number within shmem file
389 * shmem_page_offset = offset within page in shmem file
390 * data_page_index = page number in get_user_pages return
391 * data_page_offset = offset with data_page_index page.
392 * page_length = bytes to copy for this page
393 */
394 shmem_page_index = offset / PAGE_SIZE;
395 shmem_page_offset = offset & ~PAGE_MASK;
396 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
397 data_page_offset = data_ptr & ~PAGE_MASK;
398
399 page_length = remain;
400 if ((shmem_page_offset + page_length) > PAGE_SIZE)
401 page_length = PAGE_SIZE - shmem_page_offset;
402 if ((data_page_offset + page_length) > PAGE_SIZE)
403 page_length = PAGE_SIZE - data_page_offset;
404
280b713b
EA
405 if (do_bit17_swizzling) {
406 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
407 shmem_page_offset,
408 user_pages[data_page_index],
409 data_page_offset,
410 page_length,
411 1);
412 } else {
413 ret = slow_shmem_copy(user_pages[data_page_index],
414 data_page_offset,
415 obj_priv->pages[shmem_page_index],
416 shmem_page_offset,
417 page_length);
418 }
eb01459f
EA
419 if (ret)
420 goto fail_put_pages;
421
422 remain -= page_length;
423 data_ptr += page_length;
424 offset += page_length;
425 }
426
427fail_put_pages:
428 i915_gem_object_put_pages(obj);
429fail_unlock:
430 mutex_unlock(&dev->struct_mutex);
431fail_put_user_pages:
432 for (i = 0; i < pinned_pages; i++) {
433 SetPageDirty(user_pages[i]);
434 page_cache_release(user_pages[i]);
435 }
8e7d2b2c 436 drm_free_large(user_pages);
eb01459f
EA
437
438 return ret;
439}
440
673a394b
EA
441/**
442 * Reads data from the object referenced by handle.
443 *
444 * On error, the contents of *data are undefined.
445 */
446int
447i915_gem_pread_ioctl(struct drm_device *dev, void *data,
448 struct drm_file *file_priv)
449{
450 struct drm_i915_gem_pread *args = data;
451 struct drm_gem_object *obj;
452 struct drm_i915_gem_object *obj_priv;
673a394b
EA
453 int ret;
454
455 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
456 if (obj == NULL)
457 return -EBADF;
458 obj_priv = obj->driver_private;
459
460 /* Bounds check source.
461 *
462 * XXX: This could use review for overflow issues...
463 */
464 if (args->offset > obj->size || args->size > obj->size ||
465 args->offset + args->size > obj->size) {
466 drm_gem_object_unreference(obj);
467 return -EINVAL;
468 }
469
280b713b 470 if (i915_gem_object_needs_bit17_swizzle(obj)) {
eb01459f 471 ret = i915_gem_shmem_pread_slow(dev, obj, args, file_priv);
280b713b
EA
472 } else {
473 ret = i915_gem_shmem_pread_fast(dev, obj, args, file_priv);
474 if (ret != 0)
475 ret = i915_gem_shmem_pread_slow(dev, obj, args,
476 file_priv);
477 }
673a394b
EA
478
479 drm_gem_object_unreference(obj);
673a394b 480
eb01459f 481 return ret;
673a394b
EA
482}
483
0839ccb8
KP
484/* This is the fast write path which cannot handle
485 * page faults in the source data
9b7530cc 486 */
0839ccb8
KP
487
488static inline int
489fast_user_write(struct io_mapping *mapping,
490 loff_t page_base, int page_offset,
491 char __user *user_data,
492 int length)
9b7530cc 493{
9b7530cc 494 char *vaddr_atomic;
0839ccb8 495 unsigned long unwritten;
9b7530cc 496
0839ccb8
KP
497 vaddr_atomic = io_mapping_map_atomic_wc(mapping, page_base);
498 unwritten = __copy_from_user_inatomic_nocache(vaddr_atomic + page_offset,
499 user_data, length);
500 io_mapping_unmap_atomic(vaddr_atomic);
501 if (unwritten)
502 return -EFAULT;
503 return 0;
504}
505
506/* Here's the write path which can sleep for
507 * page faults
508 */
509
510static inline int
3de09aa3
EA
511slow_kernel_write(struct io_mapping *mapping,
512 loff_t gtt_base, int gtt_offset,
513 struct page *user_page, int user_offset,
514 int length)
0839ccb8 515{
3de09aa3 516 char *src_vaddr, *dst_vaddr;
0839ccb8
KP
517 unsigned long unwritten;
518
3de09aa3
EA
519 dst_vaddr = io_mapping_map_atomic_wc(mapping, gtt_base);
520 src_vaddr = kmap_atomic(user_page, KM_USER1);
521 unwritten = __copy_from_user_inatomic_nocache(dst_vaddr + gtt_offset,
522 src_vaddr + user_offset,
523 length);
524 kunmap_atomic(src_vaddr, KM_USER1);
525 io_mapping_unmap_atomic(dst_vaddr);
0839ccb8
KP
526 if (unwritten)
527 return -EFAULT;
9b7530cc 528 return 0;
9b7530cc
LT
529}
530
40123c1f
EA
531static inline int
532fast_shmem_write(struct page **pages,
533 loff_t page_base, int page_offset,
534 char __user *data,
535 int length)
536{
537 char __iomem *vaddr;
d0088775 538 unsigned long unwritten;
40123c1f
EA
539
540 vaddr = kmap_atomic(pages[page_base >> PAGE_SHIFT], KM_USER0);
541 if (vaddr == NULL)
542 return -ENOMEM;
d0088775 543 unwritten = __copy_from_user_inatomic(vaddr + page_offset, data, length);
40123c1f
EA
544 kunmap_atomic(vaddr, KM_USER0);
545
d0088775
DA
546 if (unwritten)
547 return -EFAULT;
40123c1f
EA
548 return 0;
549}
550
3de09aa3
EA
551/**
552 * This is the fast pwrite path, where we copy the data directly from the
553 * user into the GTT, uncached.
554 */
673a394b 555static int
3de09aa3
EA
556i915_gem_gtt_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
557 struct drm_i915_gem_pwrite *args,
558 struct drm_file *file_priv)
673a394b
EA
559{
560 struct drm_i915_gem_object *obj_priv = obj->driver_private;
0839ccb8 561 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b 562 ssize_t remain;
0839ccb8 563 loff_t offset, page_base;
673a394b 564 char __user *user_data;
0839ccb8
KP
565 int page_offset, page_length;
566 int ret;
673a394b
EA
567
568 user_data = (char __user *) (uintptr_t) args->data_ptr;
569 remain = args->size;
570 if (!access_ok(VERIFY_READ, user_data, remain))
571 return -EFAULT;
572
573
574 mutex_lock(&dev->struct_mutex);
575 ret = i915_gem_object_pin(obj, 0);
576 if (ret) {
577 mutex_unlock(&dev->struct_mutex);
578 return ret;
579 }
2ef7eeaa 580 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
673a394b
EA
581 if (ret)
582 goto fail;
583
584 obj_priv = obj->driver_private;
585 offset = obj_priv->gtt_offset + args->offset;
673a394b
EA
586
587 while (remain > 0) {
588 /* Operation in this page
589 *
0839ccb8
KP
590 * page_base = page offset within aperture
591 * page_offset = offset within page
592 * page_length = bytes to copy for this page
673a394b 593 */
0839ccb8
KP
594 page_base = (offset & ~(PAGE_SIZE-1));
595 page_offset = offset & (PAGE_SIZE-1);
596 page_length = remain;
597 if ((page_offset + remain) > PAGE_SIZE)
598 page_length = PAGE_SIZE - page_offset;
599
600 ret = fast_user_write (dev_priv->mm.gtt_mapping, page_base,
601 page_offset, user_data, page_length);
602
603 /* If we get a fault while copying data, then (presumably) our
3de09aa3
EA
604 * source page isn't available. Return the error and we'll
605 * retry in the slow path.
0839ccb8 606 */
3de09aa3
EA
607 if (ret)
608 goto fail;
673a394b 609
0839ccb8
KP
610 remain -= page_length;
611 user_data += page_length;
612 offset += page_length;
673a394b 613 }
673a394b
EA
614
615fail:
616 i915_gem_object_unpin(obj);
617 mutex_unlock(&dev->struct_mutex);
618
619 return ret;
620}
621
3de09aa3
EA
622/**
623 * This is the fallback GTT pwrite path, which uses get_user_pages to pin
624 * the memory and maps it using kmap_atomic for copying.
625 *
626 * This code resulted in x11perf -rgb10text consuming about 10% more CPU
627 * than using i915_gem_gtt_pwrite_fast on a G45 (32-bit).
628 */
3043c60c 629static int
3de09aa3
EA
630i915_gem_gtt_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
631 struct drm_i915_gem_pwrite *args,
632 struct drm_file *file_priv)
673a394b 633{
3de09aa3
EA
634 struct drm_i915_gem_object *obj_priv = obj->driver_private;
635 drm_i915_private_t *dev_priv = dev->dev_private;
636 ssize_t remain;
637 loff_t gtt_page_base, offset;
638 loff_t first_data_page, last_data_page, num_pages;
639 loff_t pinned_pages, i;
640 struct page **user_pages;
641 struct mm_struct *mm = current->mm;
642 int gtt_page_offset, data_page_offset, data_page_index, page_length;
673a394b 643 int ret;
3de09aa3
EA
644 uint64_t data_ptr = args->data_ptr;
645
646 remain = args->size;
647
648 /* Pin the user pages containing the data. We can't fault while
649 * holding the struct mutex, and all of the pwrite implementations
650 * want to hold it while dereferencing the user data.
651 */
652 first_data_page = data_ptr / PAGE_SIZE;
653 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
654 num_pages = last_data_page - first_data_page + 1;
655
8e7d2b2c 656 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
3de09aa3
EA
657 if (user_pages == NULL)
658 return -ENOMEM;
659
660 down_read(&mm->mmap_sem);
661 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
662 num_pages, 0, 0, user_pages, NULL);
663 up_read(&mm->mmap_sem);
664 if (pinned_pages < num_pages) {
665 ret = -EFAULT;
666 goto out_unpin_pages;
667 }
673a394b
EA
668
669 mutex_lock(&dev->struct_mutex);
3de09aa3
EA
670 ret = i915_gem_object_pin(obj, 0);
671 if (ret)
672 goto out_unlock;
673
674 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
675 if (ret)
676 goto out_unpin_object;
677
678 obj_priv = obj->driver_private;
679 offset = obj_priv->gtt_offset + args->offset;
680
681 while (remain > 0) {
682 /* Operation in this page
683 *
684 * gtt_page_base = page offset within aperture
685 * gtt_page_offset = offset within page in aperture
686 * data_page_index = page number in get_user_pages return
687 * data_page_offset = offset with data_page_index page.
688 * page_length = bytes to copy for this page
689 */
690 gtt_page_base = offset & PAGE_MASK;
691 gtt_page_offset = offset & ~PAGE_MASK;
692 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
693 data_page_offset = data_ptr & ~PAGE_MASK;
694
695 page_length = remain;
696 if ((gtt_page_offset + page_length) > PAGE_SIZE)
697 page_length = PAGE_SIZE - gtt_page_offset;
698 if ((data_page_offset + page_length) > PAGE_SIZE)
699 page_length = PAGE_SIZE - data_page_offset;
700
701 ret = slow_kernel_write(dev_priv->mm.gtt_mapping,
702 gtt_page_base, gtt_page_offset,
703 user_pages[data_page_index],
704 data_page_offset,
705 page_length);
706
707 /* If we get a fault while copying data, then (presumably) our
708 * source page isn't available. Return the error and we'll
709 * retry in the slow path.
710 */
711 if (ret)
712 goto out_unpin_object;
713
714 remain -= page_length;
715 offset += page_length;
716 data_ptr += page_length;
717 }
718
719out_unpin_object:
720 i915_gem_object_unpin(obj);
721out_unlock:
722 mutex_unlock(&dev->struct_mutex);
723out_unpin_pages:
724 for (i = 0; i < pinned_pages; i++)
725 page_cache_release(user_pages[i]);
8e7d2b2c 726 drm_free_large(user_pages);
3de09aa3
EA
727
728 return ret;
729}
730
40123c1f
EA
731/**
732 * This is the fast shmem pwrite path, which attempts to directly
733 * copy_from_user into the kmapped pages backing the object.
734 */
3043c60c 735static int
40123c1f
EA
736i915_gem_shmem_pwrite_fast(struct drm_device *dev, struct drm_gem_object *obj,
737 struct drm_i915_gem_pwrite *args,
738 struct drm_file *file_priv)
673a394b 739{
40123c1f
EA
740 struct drm_i915_gem_object *obj_priv = obj->driver_private;
741 ssize_t remain;
742 loff_t offset, page_base;
743 char __user *user_data;
744 int page_offset, page_length;
673a394b 745 int ret;
40123c1f
EA
746
747 user_data = (char __user *) (uintptr_t) args->data_ptr;
748 remain = args->size;
673a394b
EA
749
750 mutex_lock(&dev->struct_mutex);
751
40123c1f
EA
752 ret = i915_gem_object_get_pages(obj);
753 if (ret != 0)
754 goto fail_unlock;
673a394b 755
e47c68e9 756 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
40123c1f
EA
757 if (ret != 0)
758 goto fail_put_pages;
759
760 obj_priv = obj->driver_private;
761 offset = args->offset;
762 obj_priv->dirty = 1;
763
764 while (remain > 0) {
765 /* Operation in this page
766 *
767 * page_base = page offset within aperture
768 * page_offset = offset within page
769 * page_length = bytes to copy for this page
770 */
771 page_base = (offset & ~(PAGE_SIZE-1));
772 page_offset = offset & (PAGE_SIZE-1);
773 page_length = remain;
774 if ((page_offset + remain) > PAGE_SIZE)
775 page_length = PAGE_SIZE - page_offset;
776
777 ret = fast_shmem_write(obj_priv->pages,
778 page_base, page_offset,
779 user_data, page_length);
780 if (ret)
781 goto fail_put_pages;
782
783 remain -= page_length;
784 user_data += page_length;
785 offset += page_length;
786 }
787
788fail_put_pages:
789 i915_gem_object_put_pages(obj);
790fail_unlock:
791 mutex_unlock(&dev->struct_mutex);
792
793 return ret;
794}
795
796/**
797 * This is the fallback shmem pwrite path, which uses get_user_pages to pin
798 * the memory and maps it using kmap_atomic for copying.
799 *
800 * This avoids taking mmap_sem for faulting on the user's address while the
801 * struct_mutex is held.
802 */
803static int
804i915_gem_shmem_pwrite_slow(struct drm_device *dev, struct drm_gem_object *obj,
805 struct drm_i915_gem_pwrite *args,
806 struct drm_file *file_priv)
807{
808 struct drm_i915_gem_object *obj_priv = obj->driver_private;
809 struct mm_struct *mm = current->mm;
810 struct page **user_pages;
811 ssize_t remain;
812 loff_t offset, pinned_pages, i;
813 loff_t first_data_page, last_data_page, num_pages;
814 int shmem_page_index, shmem_page_offset;
815 int data_page_index, data_page_offset;
816 int page_length;
817 int ret;
818 uint64_t data_ptr = args->data_ptr;
280b713b 819 int do_bit17_swizzling;
40123c1f
EA
820
821 remain = args->size;
822
823 /* Pin the user pages containing the data. We can't fault while
824 * holding the struct mutex, and all of the pwrite implementations
825 * want to hold it while dereferencing the user data.
826 */
827 first_data_page = data_ptr / PAGE_SIZE;
828 last_data_page = (data_ptr + args->size - 1) / PAGE_SIZE;
829 num_pages = last_data_page - first_data_page + 1;
830
8e7d2b2c 831 user_pages = drm_calloc_large(num_pages, sizeof(struct page *));
40123c1f
EA
832 if (user_pages == NULL)
833 return -ENOMEM;
834
835 down_read(&mm->mmap_sem);
836 pinned_pages = get_user_pages(current, mm, (uintptr_t)args->data_ptr,
837 num_pages, 0, 0, user_pages, NULL);
838 up_read(&mm->mmap_sem);
839 if (pinned_pages < num_pages) {
840 ret = -EFAULT;
841 goto fail_put_user_pages;
673a394b
EA
842 }
843
280b713b
EA
844 do_bit17_swizzling = i915_gem_object_needs_bit17_swizzle(obj);
845
40123c1f
EA
846 mutex_lock(&dev->struct_mutex);
847
848 ret = i915_gem_object_get_pages(obj);
849 if (ret != 0)
850 goto fail_unlock;
851
852 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
853 if (ret != 0)
854 goto fail_put_pages;
855
856 obj_priv = obj->driver_private;
673a394b 857 offset = args->offset;
40123c1f 858 obj_priv->dirty = 1;
673a394b 859
40123c1f
EA
860 while (remain > 0) {
861 /* Operation in this page
862 *
863 * shmem_page_index = page number within shmem file
864 * shmem_page_offset = offset within page in shmem file
865 * data_page_index = page number in get_user_pages return
866 * data_page_offset = offset with data_page_index page.
867 * page_length = bytes to copy for this page
868 */
869 shmem_page_index = offset / PAGE_SIZE;
870 shmem_page_offset = offset & ~PAGE_MASK;
871 data_page_index = data_ptr / PAGE_SIZE - first_data_page;
872 data_page_offset = data_ptr & ~PAGE_MASK;
873
874 page_length = remain;
875 if ((shmem_page_offset + page_length) > PAGE_SIZE)
876 page_length = PAGE_SIZE - shmem_page_offset;
877 if ((data_page_offset + page_length) > PAGE_SIZE)
878 page_length = PAGE_SIZE - data_page_offset;
879
280b713b
EA
880 if (do_bit17_swizzling) {
881 ret = slow_shmem_bit17_copy(obj_priv->pages[shmem_page_index],
882 shmem_page_offset,
883 user_pages[data_page_index],
884 data_page_offset,
885 page_length,
886 0);
887 } else {
888 ret = slow_shmem_copy(obj_priv->pages[shmem_page_index],
889 shmem_page_offset,
890 user_pages[data_page_index],
891 data_page_offset,
892 page_length);
893 }
40123c1f
EA
894 if (ret)
895 goto fail_put_pages;
896
897 remain -= page_length;
898 data_ptr += page_length;
899 offset += page_length;
673a394b
EA
900 }
901
40123c1f
EA
902fail_put_pages:
903 i915_gem_object_put_pages(obj);
904fail_unlock:
673a394b 905 mutex_unlock(&dev->struct_mutex);
40123c1f
EA
906fail_put_user_pages:
907 for (i = 0; i < pinned_pages; i++)
908 page_cache_release(user_pages[i]);
8e7d2b2c 909 drm_free_large(user_pages);
673a394b 910
40123c1f 911 return ret;
673a394b
EA
912}
913
914/**
915 * Writes data to the object referenced by handle.
916 *
917 * On error, the contents of the buffer that were to be modified are undefined.
918 */
919int
920i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
921 struct drm_file *file_priv)
922{
923 struct drm_i915_gem_pwrite *args = data;
924 struct drm_gem_object *obj;
925 struct drm_i915_gem_object *obj_priv;
926 int ret = 0;
927
928 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
929 if (obj == NULL)
930 return -EBADF;
931 obj_priv = obj->driver_private;
932
933 /* Bounds check destination.
934 *
935 * XXX: This could use review for overflow issues...
936 */
937 if (args->offset > obj->size || args->size > obj->size ||
938 args->offset + args->size > obj->size) {
939 drm_gem_object_unreference(obj);
940 return -EINVAL;
941 }
942
943 /* We can only do the GTT pwrite on untiled buffers, as otherwise
944 * it would end up going through the fenced access, and we'll get
945 * different detiling behavior between reading and writing.
946 * pread/pwrite currently are reading and writing from the CPU
947 * perspective, requiring manual detiling by the client.
948 */
71acb5eb
DA
949 if (obj_priv->phys_obj)
950 ret = i915_gem_phys_pwrite(dev, obj, args, file_priv);
951 else if (obj_priv->tiling_mode == I915_TILING_NONE &&
3de09aa3
EA
952 dev->gtt_total != 0) {
953 ret = i915_gem_gtt_pwrite_fast(dev, obj, args, file_priv);
954 if (ret == -EFAULT) {
955 ret = i915_gem_gtt_pwrite_slow(dev, obj, args,
956 file_priv);
957 }
280b713b
EA
958 } else if (i915_gem_object_needs_bit17_swizzle(obj)) {
959 ret = i915_gem_shmem_pwrite_slow(dev, obj, args, file_priv);
40123c1f
EA
960 } else {
961 ret = i915_gem_shmem_pwrite_fast(dev, obj, args, file_priv);
962 if (ret == -EFAULT) {
963 ret = i915_gem_shmem_pwrite_slow(dev, obj, args,
964 file_priv);
965 }
966 }
673a394b
EA
967
968#if WATCH_PWRITE
969 if (ret)
970 DRM_INFO("pwrite failed %d\n", ret);
971#endif
972
973 drm_gem_object_unreference(obj);
974
975 return ret;
976}
977
978/**
2ef7eeaa
EA
979 * Called when user space prepares to use an object with the CPU, either
980 * through the mmap ioctl's mapping or a GTT mapping.
673a394b
EA
981 */
982int
983i915_gem_set_domain_ioctl(struct drm_device *dev, void *data,
984 struct drm_file *file_priv)
985{
a09ba7fa 986 struct drm_i915_private *dev_priv = dev->dev_private;
673a394b
EA
987 struct drm_i915_gem_set_domain *args = data;
988 struct drm_gem_object *obj;
652c393a 989 struct drm_i915_gem_object *obj_priv;
2ef7eeaa
EA
990 uint32_t read_domains = args->read_domains;
991 uint32_t write_domain = args->write_domain;
673a394b
EA
992 int ret;
993
994 if (!(dev->driver->driver_features & DRIVER_GEM))
995 return -ENODEV;
996
2ef7eeaa 997 /* Only handle setting domains to types used by the CPU. */
21d509e3 998 if (write_domain & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
999 return -EINVAL;
1000
21d509e3 1001 if (read_domains & I915_GEM_GPU_DOMAINS)
2ef7eeaa
EA
1002 return -EINVAL;
1003
1004 /* Having something in the write domain implies it's in the read
1005 * domain, and only that read domain. Enforce that in the request.
1006 */
1007 if (write_domain != 0 && read_domains != write_domain)
1008 return -EINVAL;
1009
673a394b
EA
1010 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1011 if (obj == NULL)
1012 return -EBADF;
652c393a 1013 obj_priv = obj->driver_private;
673a394b
EA
1014
1015 mutex_lock(&dev->struct_mutex);
652c393a
JB
1016
1017 intel_mark_busy(dev, obj);
1018
673a394b 1019#if WATCH_BUF
cfd43c02 1020 DRM_INFO("set_domain_ioctl %p(%zd), %08x %08x\n",
2ef7eeaa 1021 obj, obj->size, read_domains, write_domain);
673a394b 1022#endif
2ef7eeaa
EA
1023 if (read_domains & I915_GEM_DOMAIN_GTT) {
1024 ret = i915_gem_object_set_to_gtt_domain(obj, write_domain != 0);
02354392 1025
a09ba7fa
EA
1026 /* Update the LRU on the fence for the CPU access that's
1027 * about to occur.
1028 */
1029 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
1030 list_move_tail(&obj_priv->fence_list,
1031 &dev_priv->mm.fence_list);
1032 }
1033
02354392
EA
1034 /* Silently promote "you're not bound, there was nothing to do"
1035 * to success, since the client was just asking us to
1036 * make sure everything was done.
1037 */
1038 if (ret == -EINVAL)
1039 ret = 0;
2ef7eeaa 1040 } else {
e47c68e9 1041 ret = i915_gem_object_set_to_cpu_domain(obj, write_domain != 0);
2ef7eeaa
EA
1042 }
1043
673a394b
EA
1044 drm_gem_object_unreference(obj);
1045 mutex_unlock(&dev->struct_mutex);
1046 return ret;
1047}
1048
1049/**
1050 * Called when user space has done writes to this buffer
1051 */
1052int
1053i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
1054 struct drm_file *file_priv)
1055{
1056 struct drm_i915_gem_sw_finish *args = data;
1057 struct drm_gem_object *obj;
1058 struct drm_i915_gem_object *obj_priv;
1059 int ret = 0;
1060
1061 if (!(dev->driver->driver_features & DRIVER_GEM))
1062 return -ENODEV;
1063
1064 mutex_lock(&dev->struct_mutex);
1065 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1066 if (obj == NULL) {
1067 mutex_unlock(&dev->struct_mutex);
1068 return -EBADF;
1069 }
1070
1071#if WATCH_BUF
cfd43c02 1072 DRM_INFO("%s: sw_finish %d (%p %zd)\n",
673a394b
EA
1073 __func__, args->handle, obj, obj->size);
1074#endif
1075 obj_priv = obj->driver_private;
1076
1077 /* Pinned buffers may be scanout, so flush the cache */
e47c68e9
EA
1078 if (obj_priv->pin_count)
1079 i915_gem_object_flush_cpu_write_domain(obj);
1080
673a394b
EA
1081 drm_gem_object_unreference(obj);
1082 mutex_unlock(&dev->struct_mutex);
1083 return ret;
1084}
1085
1086/**
1087 * Maps the contents of an object, returning the address it is mapped
1088 * into.
1089 *
1090 * While the mapping holds a reference on the contents of the object, it doesn't
1091 * imply a ref on the object itself.
1092 */
1093int
1094i915_gem_mmap_ioctl(struct drm_device *dev, void *data,
1095 struct drm_file *file_priv)
1096{
1097 struct drm_i915_gem_mmap *args = data;
1098 struct drm_gem_object *obj;
1099 loff_t offset;
1100 unsigned long addr;
1101
1102 if (!(dev->driver->driver_features & DRIVER_GEM))
1103 return -ENODEV;
1104
1105 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1106 if (obj == NULL)
1107 return -EBADF;
1108
1109 offset = args->offset;
1110
1111 down_write(&current->mm->mmap_sem);
1112 addr = do_mmap(obj->filp, 0, args->size,
1113 PROT_READ | PROT_WRITE, MAP_SHARED,
1114 args->offset);
1115 up_write(&current->mm->mmap_sem);
1116 mutex_lock(&dev->struct_mutex);
1117 drm_gem_object_unreference(obj);
1118 mutex_unlock(&dev->struct_mutex);
1119 if (IS_ERR((void *)addr))
1120 return addr;
1121
1122 args->addr_ptr = (uint64_t) addr;
1123
1124 return 0;
1125}
1126
de151cf6
JB
1127/**
1128 * i915_gem_fault - fault a page into the GTT
1129 * vma: VMA in question
1130 * vmf: fault info
1131 *
1132 * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
1133 * from userspace. The fault handler takes care of binding the object to
1134 * the GTT (if needed), allocating and programming a fence register (again,
1135 * only if needed based on whether the old reg is still valid or the object
1136 * is tiled) and inserting a new PTE into the faulting process.
1137 *
1138 * Note that the faulting process may involve evicting existing objects
1139 * from the GTT and/or fence registers to make room. So performance may
1140 * suffer if the GTT working set is large or there are few fence registers
1141 * left.
1142 */
1143int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
1144{
1145 struct drm_gem_object *obj = vma->vm_private_data;
1146 struct drm_device *dev = obj->dev;
1147 struct drm_i915_private *dev_priv = dev->dev_private;
1148 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1149 pgoff_t page_offset;
1150 unsigned long pfn;
1151 int ret = 0;
0f973f27 1152 bool write = !!(vmf->flags & FAULT_FLAG_WRITE);
de151cf6
JB
1153
1154 /* We don't use vmf->pgoff since that has the fake offset */
1155 page_offset = ((unsigned long)vmf->virtual_address - vma->vm_start) >>
1156 PAGE_SHIFT;
1157
1158 /* Now bind it into the GTT if needed */
1159 mutex_lock(&dev->struct_mutex);
1160 if (!obj_priv->gtt_space) {
e67b8ce1 1161 ret = i915_gem_object_bind_to_gtt(obj, 0);
de151cf6
JB
1162 if (ret) {
1163 mutex_unlock(&dev->struct_mutex);
1164 return VM_FAULT_SIGBUS;
1165 }
4960aaca 1166 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
07f4f3e8
KH
1167
1168 ret = i915_gem_object_set_to_gtt_domain(obj, write);
1169 if (ret) {
1170 mutex_unlock(&dev->struct_mutex);
1171 return VM_FAULT_SIGBUS;
1172 }
de151cf6
JB
1173 }
1174
1175 /* Need a new fence register? */
a09ba7fa 1176 if (obj_priv->tiling_mode != I915_TILING_NONE) {
8c4b8c3f 1177 ret = i915_gem_object_get_fence_reg(obj);
7d8d58b2
CW
1178 if (ret) {
1179 mutex_unlock(&dev->struct_mutex);
d9ddcb96 1180 return VM_FAULT_SIGBUS;
7d8d58b2 1181 }
d9ddcb96 1182 }
de151cf6
JB
1183
1184 pfn = ((dev->agp->base + obj_priv->gtt_offset) >> PAGE_SHIFT) +
1185 page_offset;
1186
1187 /* Finally, remap it using the new GTT offset */
1188 ret = vm_insert_pfn(vma, (unsigned long)vmf->virtual_address, pfn);
1189
1190 mutex_unlock(&dev->struct_mutex);
1191
1192 switch (ret) {
1193 case -ENOMEM:
1194 case -EAGAIN:
1195 return VM_FAULT_OOM;
1196 case -EFAULT:
959b887c 1197 case -EINVAL:
de151cf6
JB
1198 return VM_FAULT_SIGBUS;
1199 default:
1200 return VM_FAULT_NOPAGE;
1201 }
1202}
1203
1204/**
1205 * i915_gem_create_mmap_offset - create a fake mmap offset for an object
1206 * @obj: obj in question
1207 *
1208 * GEM memory mapping works by handing back to userspace a fake mmap offset
1209 * it can use in a subsequent mmap(2) call. The DRM core code then looks
1210 * up the object based on the offset and sets up the various memory mapping
1211 * structures.
1212 *
1213 * This routine allocates and attaches a fake offset for @obj.
1214 */
1215static int
1216i915_gem_create_mmap_offset(struct drm_gem_object *obj)
1217{
1218 struct drm_device *dev = obj->dev;
1219 struct drm_gem_mm *mm = dev->mm_private;
1220 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1221 struct drm_map_list *list;
f77d390c 1222 struct drm_local_map *map;
de151cf6
JB
1223 int ret = 0;
1224
1225 /* Set the object up for mmap'ing */
1226 list = &obj->map_list;
9a298b2a 1227 list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
de151cf6
JB
1228 if (!list->map)
1229 return -ENOMEM;
1230
1231 map = list->map;
1232 map->type = _DRM_GEM;
1233 map->size = obj->size;
1234 map->handle = obj;
1235
1236 /* Get a DRM GEM mmap offset allocated... */
1237 list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
1238 obj->size / PAGE_SIZE, 0, 0);
1239 if (!list->file_offset_node) {
1240 DRM_ERROR("failed to allocate offset for bo %d\n", obj->name);
1241 ret = -ENOMEM;
1242 goto out_free_list;
1243 }
1244
1245 list->file_offset_node = drm_mm_get_block(list->file_offset_node,
1246 obj->size / PAGE_SIZE, 0);
1247 if (!list->file_offset_node) {
1248 ret = -ENOMEM;
1249 goto out_free_list;
1250 }
1251
1252 list->hash.key = list->file_offset_node->start;
1253 if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
1254 DRM_ERROR("failed to add to map hash\n");
1255 goto out_free_mm;
1256 }
1257
1258 /* By now we should be all set, any drm_mmap request on the offset
1259 * below will get to our mmap & fault handler */
1260 obj_priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
1261
1262 return 0;
1263
1264out_free_mm:
1265 drm_mm_put_block(list->file_offset_node);
1266out_free_list:
9a298b2a 1267 kfree(list->map);
de151cf6
JB
1268
1269 return ret;
1270}
1271
901782b2
CW
1272/**
1273 * i915_gem_release_mmap - remove physical page mappings
1274 * @obj: obj in question
1275 *
1276 * Preserve the reservation of the mmaping with the DRM core code, but
1277 * relinquish ownership of the pages back to the system.
1278 *
1279 * It is vital that we remove the page mapping if we have mapped a tiled
1280 * object through the GTT and then lose the fence register due to
1281 * resource pressure. Similarly if the object has been moved out of the
1282 * aperture, than pages mapped into userspace must be revoked. Removing the
1283 * mapping will then trigger a page fault on the next user access, allowing
1284 * fixup by i915_gem_fault().
1285 */
d05ca301 1286void
901782b2
CW
1287i915_gem_release_mmap(struct drm_gem_object *obj)
1288{
1289 struct drm_device *dev = obj->dev;
1290 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1291
1292 if (dev->dev_mapping)
1293 unmap_mapping_range(dev->dev_mapping,
1294 obj_priv->mmap_offset, obj->size, 1);
1295}
1296
ab00b3e5
JB
1297static void
1298i915_gem_free_mmap_offset(struct drm_gem_object *obj)
1299{
1300 struct drm_device *dev = obj->dev;
1301 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1302 struct drm_gem_mm *mm = dev->mm_private;
1303 struct drm_map_list *list;
1304
1305 list = &obj->map_list;
1306 drm_ht_remove_item(&mm->offset_hash, &list->hash);
1307
1308 if (list->file_offset_node) {
1309 drm_mm_put_block(list->file_offset_node);
1310 list->file_offset_node = NULL;
1311 }
1312
1313 if (list->map) {
9a298b2a 1314 kfree(list->map);
ab00b3e5
JB
1315 list->map = NULL;
1316 }
1317
1318 obj_priv->mmap_offset = 0;
1319}
1320
de151cf6
JB
1321/**
1322 * i915_gem_get_gtt_alignment - return required GTT alignment for an object
1323 * @obj: object to check
1324 *
1325 * Return the required GTT alignment for an object, taking into account
1326 * potential fence register mapping if needed.
1327 */
1328static uint32_t
1329i915_gem_get_gtt_alignment(struct drm_gem_object *obj)
1330{
1331 struct drm_device *dev = obj->dev;
1332 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1333 int start, i;
1334
1335 /*
1336 * Minimum alignment is 4k (GTT page size), but might be greater
1337 * if a fence register is needed for the object.
1338 */
1339 if (IS_I965G(dev) || obj_priv->tiling_mode == I915_TILING_NONE)
1340 return 4096;
1341
1342 /*
1343 * Previous chips need to be aligned to the size of the smallest
1344 * fence register that can contain the object.
1345 */
1346 if (IS_I9XX(dev))
1347 start = 1024*1024;
1348 else
1349 start = 512*1024;
1350
1351 for (i = start; i < obj->size; i <<= 1)
1352 ;
1353
1354 return i;
1355}
1356
1357/**
1358 * i915_gem_mmap_gtt_ioctl - prepare an object for GTT mmap'ing
1359 * @dev: DRM device
1360 * @data: GTT mapping ioctl data
1361 * @file_priv: GEM object info
1362 *
1363 * Simply returns the fake offset to userspace so it can mmap it.
1364 * The mmap call will end up in drm_gem_mmap(), which will set things
1365 * up so we can get faults in the handler above.
1366 *
1367 * The fault handler will take care of binding the object into the GTT
1368 * (since it may have been evicted to make room for something), allocating
1369 * a fence register, and mapping the appropriate aperture address into
1370 * userspace.
1371 */
1372int
1373i915_gem_mmap_gtt_ioctl(struct drm_device *dev, void *data,
1374 struct drm_file *file_priv)
1375{
1376 struct drm_i915_gem_mmap_gtt *args = data;
1377 struct drm_i915_private *dev_priv = dev->dev_private;
1378 struct drm_gem_object *obj;
1379 struct drm_i915_gem_object *obj_priv;
1380 int ret;
1381
1382 if (!(dev->driver->driver_features & DRIVER_GEM))
1383 return -ENODEV;
1384
1385 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
1386 if (obj == NULL)
1387 return -EBADF;
1388
1389 mutex_lock(&dev->struct_mutex);
1390
1391 obj_priv = obj->driver_private;
1392
1393 if (!obj_priv->mmap_offset) {
1394 ret = i915_gem_create_mmap_offset(obj);
13af1062
CW
1395 if (ret) {
1396 drm_gem_object_unreference(obj);
1397 mutex_unlock(&dev->struct_mutex);
de151cf6 1398 return ret;
13af1062 1399 }
de151cf6
JB
1400 }
1401
1402 args->offset = obj_priv->mmap_offset;
1403
de151cf6
JB
1404 /*
1405 * Pull it into the GTT so that we have a page list (makes the
1406 * initial fault faster and any subsequent flushing possible).
1407 */
1408 if (!obj_priv->agp_mem) {
e67b8ce1 1409 ret = i915_gem_object_bind_to_gtt(obj, 0);
de151cf6
JB
1410 if (ret) {
1411 drm_gem_object_unreference(obj);
1412 mutex_unlock(&dev->struct_mutex);
1413 return ret;
1414 }
14b60391 1415 list_add_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
de151cf6
JB
1416 }
1417
1418 drm_gem_object_unreference(obj);
1419 mutex_unlock(&dev->struct_mutex);
1420
1421 return 0;
1422}
1423
6911a9b8 1424void
856fa198 1425i915_gem_object_put_pages(struct drm_gem_object *obj)
673a394b
EA
1426{
1427 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1428 int page_count = obj->size / PAGE_SIZE;
1429 int i;
1430
856fa198 1431 BUG_ON(obj_priv->pages_refcount == 0);
673a394b 1432
856fa198
EA
1433 if (--obj_priv->pages_refcount != 0)
1434 return;
673a394b 1435
280b713b
EA
1436 if (obj_priv->tiling_mode != I915_TILING_NONE)
1437 i915_gem_object_save_bit_17_swizzle(obj);
1438
3ef94daa
CW
1439 if (obj_priv->madv == I915_MADV_DONTNEED)
1440 obj_priv->dirty = 0;
1441
1442 for (i = 0; i < page_count; i++) {
1443 if (obj_priv->pages[i] == NULL)
1444 break;
1445
1446 if (obj_priv->dirty)
1447 set_page_dirty(obj_priv->pages[i]);
1448
1449 if (obj_priv->madv == I915_MADV_WILLNEED)
1450 mark_page_accessed(obj_priv->pages[i]);
1451
1452 page_cache_release(obj_priv->pages[i]);
1453 }
673a394b
EA
1454 obj_priv->dirty = 0;
1455
8e7d2b2c 1456 drm_free_large(obj_priv->pages);
856fa198 1457 obj_priv->pages = NULL;
673a394b
EA
1458}
1459
1460static void
ce44b0ea 1461i915_gem_object_move_to_active(struct drm_gem_object *obj, uint32_t seqno)
673a394b
EA
1462{
1463 struct drm_device *dev = obj->dev;
1464 drm_i915_private_t *dev_priv = dev->dev_private;
1465 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1466
1467 /* Add a reference if we're newly entering the active list. */
1468 if (!obj_priv->active) {
1469 drm_gem_object_reference(obj);
1470 obj_priv->active = 1;
1471 }
1472 /* Move from whatever list we were on to the tail of execution. */
5e118f41 1473 spin_lock(&dev_priv->mm.active_list_lock);
673a394b
EA
1474 list_move_tail(&obj_priv->list,
1475 &dev_priv->mm.active_list);
5e118f41 1476 spin_unlock(&dev_priv->mm.active_list_lock);
ce44b0ea 1477 obj_priv->last_rendering_seqno = seqno;
673a394b
EA
1478}
1479
ce44b0ea
EA
1480static void
1481i915_gem_object_move_to_flushing(struct drm_gem_object *obj)
1482{
1483 struct drm_device *dev = obj->dev;
1484 drm_i915_private_t *dev_priv = dev->dev_private;
1485 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1486
1487 BUG_ON(!obj_priv->active);
1488 list_move_tail(&obj_priv->list, &dev_priv->mm.flushing_list);
1489 obj_priv->last_rendering_seqno = 0;
1490}
673a394b
EA
1491
1492static void
1493i915_gem_object_move_to_inactive(struct drm_gem_object *obj)
1494{
1495 struct drm_device *dev = obj->dev;
1496 drm_i915_private_t *dev_priv = dev->dev_private;
1497 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1498
1499 i915_verify_inactive(dev, __FILE__, __LINE__);
1500 if (obj_priv->pin_count != 0)
1501 list_del_init(&obj_priv->list);
1502 else
1503 list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list);
1504
ce44b0ea 1505 obj_priv->last_rendering_seqno = 0;
673a394b
EA
1506 if (obj_priv->active) {
1507 obj_priv->active = 0;
1508 drm_gem_object_unreference(obj);
1509 }
1510 i915_verify_inactive(dev, __FILE__, __LINE__);
1511}
1512
1513/**
1514 * Creates a new sequence number, emitting a write of it to the status page
1515 * plus an interrupt, which will trigger i915_user_interrupt_handler.
1516 *
1517 * Must be called with struct_lock held.
1518 *
1519 * Returned sequence numbers are nonzero on success.
1520 */
1521static uint32_t
b962442e
EA
1522i915_add_request(struct drm_device *dev, struct drm_file *file_priv,
1523 uint32_t flush_domains)
673a394b
EA
1524{
1525 drm_i915_private_t *dev_priv = dev->dev_private;
b962442e 1526 struct drm_i915_file_private *i915_file_priv = NULL;
673a394b
EA
1527 struct drm_i915_gem_request *request;
1528 uint32_t seqno;
1529 int was_empty;
1530 RING_LOCALS;
1531
b962442e
EA
1532 if (file_priv != NULL)
1533 i915_file_priv = file_priv->driver_priv;
1534
9a298b2a 1535 request = kzalloc(sizeof(*request), GFP_KERNEL);
673a394b
EA
1536 if (request == NULL)
1537 return 0;
1538
1539 /* Grab the seqno we're going to make this request be, and bump the
1540 * next (skipping 0 so it can be the reserved no-seqno value).
1541 */
1542 seqno = dev_priv->mm.next_gem_seqno;
1543 dev_priv->mm.next_gem_seqno++;
1544 if (dev_priv->mm.next_gem_seqno == 0)
1545 dev_priv->mm.next_gem_seqno++;
1546
1547 BEGIN_LP_RING(4);
1548 OUT_RING(MI_STORE_DWORD_INDEX);
1549 OUT_RING(I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1550 OUT_RING(seqno);
1551
1552 OUT_RING(MI_USER_INTERRUPT);
1553 ADVANCE_LP_RING();
1554
1555 DRM_DEBUG("%d\n", seqno);
1556
1557 request->seqno = seqno;
1558 request->emitted_jiffies = jiffies;
673a394b
EA
1559 was_empty = list_empty(&dev_priv->mm.request_list);
1560 list_add_tail(&request->list, &dev_priv->mm.request_list);
b962442e
EA
1561 if (i915_file_priv) {
1562 list_add_tail(&request->client_list,
1563 &i915_file_priv->mm.request_list);
1564 } else {
1565 INIT_LIST_HEAD(&request->client_list);
1566 }
673a394b 1567
ce44b0ea
EA
1568 /* Associate any objects on the flushing list matching the write
1569 * domain we're flushing with our flush.
1570 */
1571 if (flush_domains != 0) {
1572 struct drm_i915_gem_object *obj_priv, *next;
1573
1574 list_for_each_entry_safe(obj_priv, next,
1575 &dev_priv->mm.flushing_list, list) {
1576 struct drm_gem_object *obj = obj_priv->obj;
1577
1578 if ((obj->write_domain & flush_domains) ==
1579 obj->write_domain) {
1580 obj->write_domain = 0;
1581 i915_gem_object_move_to_active(obj, seqno);
1582 }
1583 }
1584
1585 }
1586
f65d9421
BG
1587 if (!dev_priv->mm.suspended) {
1588 mod_timer(&dev_priv->hangcheck_timer, jiffies + DRM_I915_HANGCHECK_PERIOD);
1589 if (was_empty)
1590 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
1591 }
673a394b
EA
1592 return seqno;
1593}
1594
1595/**
1596 * Command execution barrier
1597 *
1598 * Ensures that all commands in the ring are finished
1599 * before signalling the CPU
1600 */
3043c60c 1601static uint32_t
673a394b
EA
1602i915_retire_commands(struct drm_device *dev)
1603{
1604 drm_i915_private_t *dev_priv = dev->dev_private;
1605 uint32_t cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1606 uint32_t flush_domains = 0;
1607 RING_LOCALS;
1608
1609 /* The sampler always gets flushed on i965 (sigh) */
1610 if (IS_I965G(dev))
1611 flush_domains |= I915_GEM_DOMAIN_SAMPLER;
1612 BEGIN_LP_RING(2);
1613 OUT_RING(cmd);
1614 OUT_RING(0); /* noop */
1615 ADVANCE_LP_RING();
1616 return flush_domains;
1617}
1618
1619/**
1620 * Moves buffers associated only with the given active seqno from the active
1621 * to inactive list, potentially freeing them.
1622 */
1623static void
1624i915_gem_retire_request(struct drm_device *dev,
1625 struct drm_i915_gem_request *request)
1626{
1627 drm_i915_private_t *dev_priv = dev->dev_private;
1628
1629 /* Move any buffers on the active list that are no longer referenced
1630 * by the ringbuffer to the flushing/inactive lists as appropriate.
1631 */
5e118f41 1632 spin_lock(&dev_priv->mm.active_list_lock);
673a394b
EA
1633 while (!list_empty(&dev_priv->mm.active_list)) {
1634 struct drm_gem_object *obj;
1635 struct drm_i915_gem_object *obj_priv;
1636
1637 obj_priv = list_first_entry(&dev_priv->mm.active_list,
1638 struct drm_i915_gem_object,
1639 list);
1640 obj = obj_priv->obj;
1641
1642 /* If the seqno being retired doesn't match the oldest in the
1643 * list, then the oldest in the list must still be newer than
1644 * this seqno.
1645 */
1646 if (obj_priv->last_rendering_seqno != request->seqno)
5e118f41 1647 goto out;
de151cf6 1648
673a394b
EA
1649#if WATCH_LRU
1650 DRM_INFO("%s: retire %d moves to inactive list %p\n",
1651 __func__, request->seqno, obj);
1652#endif
1653
ce44b0ea
EA
1654 if (obj->write_domain != 0)
1655 i915_gem_object_move_to_flushing(obj);
68c84342
SL
1656 else {
1657 /* Take a reference on the object so it won't be
1658 * freed while the spinlock is held. The list
1659 * protection for this spinlock is safe when breaking
1660 * the lock like this since the next thing we do
1661 * is just get the head of the list again.
1662 */
1663 drm_gem_object_reference(obj);
673a394b 1664 i915_gem_object_move_to_inactive(obj);
68c84342
SL
1665 spin_unlock(&dev_priv->mm.active_list_lock);
1666 drm_gem_object_unreference(obj);
1667 spin_lock(&dev_priv->mm.active_list_lock);
1668 }
673a394b 1669 }
5e118f41
CW
1670out:
1671 spin_unlock(&dev_priv->mm.active_list_lock);
673a394b
EA
1672}
1673
1674/**
1675 * Returns true if seq1 is later than seq2.
1676 */
22be1724 1677bool
673a394b
EA
1678i915_seqno_passed(uint32_t seq1, uint32_t seq2)
1679{
1680 return (int32_t)(seq1 - seq2) >= 0;
1681}
1682
1683uint32_t
1684i915_get_gem_seqno(struct drm_device *dev)
1685{
1686 drm_i915_private_t *dev_priv = dev->dev_private;
1687
1688 return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX);
1689}
1690
1691/**
1692 * This function clears the request list as sequence numbers are passed.
1693 */
1694void
1695i915_gem_retire_requests(struct drm_device *dev)
1696{
1697 drm_i915_private_t *dev_priv = dev->dev_private;
1698 uint32_t seqno;
1699
6c0594a3
KW
1700 if (!dev_priv->hw_status_page)
1701 return;
1702
673a394b
EA
1703 seqno = i915_get_gem_seqno(dev);
1704
1705 while (!list_empty(&dev_priv->mm.request_list)) {
1706 struct drm_i915_gem_request *request;
1707 uint32_t retiring_seqno;
1708
1709 request = list_first_entry(&dev_priv->mm.request_list,
1710 struct drm_i915_gem_request,
1711 list);
1712 retiring_seqno = request->seqno;
1713
1714 if (i915_seqno_passed(seqno, retiring_seqno) ||
ba1234d1 1715 atomic_read(&dev_priv->mm.wedged)) {
673a394b
EA
1716 i915_gem_retire_request(dev, request);
1717
1718 list_del(&request->list);
b962442e 1719 list_del(&request->client_list);
9a298b2a 1720 kfree(request);
673a394b
EA
1721 } else
1722 break;
1723 }
1724}
1725
1726void
1727i915_gem_retire_work_handler(struct work_struct *work)
1728{
1729 drm_i915_private_t *dev_priv;
1730 struct drm_device *dev;
1731
1732 dev_priv = container_of(work, drm_i915_private_t,
1733 mm.retire_work.work);
1734 dev = dev_priv->dev;
1735
1736 mutex_lock(&dev->struct_mutex);
1737 i915_gem_retire_requests(dev);
6dbe2772
KP
1738 if (!dev_priv->mm.suspended &&
1739 !list_empty(&dev_priv->mm.request_list))
9c9fe1f8 1740 queue_delayed_work(dev_priv->wq, &dev_priv->mm.retire_work, HZ);
673a394b
EA
1741 mutex_unlock(&dev->struct_mutex);
1742}
1743
1744/**
1745 * Waits for a sequence number to be signaled, and cleans up the
1746 * request and object lists appropriately for that event.
1747 */
3043c60c 1748static int
673a394b
EA
1749i915_wait_request(struct drm_device *dev, uint32_t seqno)
1750{
1751 drm_i915_private_t *dev_priv = dev->dev_private;
802c7eb6 1752 u32 ier;
673a394b
EA
1753 int ret = 0;
1754
1755 BUG_ON(seqno == 0);
1756
ba1234d1 1757 if (atomic_read(&dev_priv->mm.wedged))
ffed1d09
BG
1758 return -EIO;
1759
673a394b 1760 if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) {
036a4a7d
ZW
1761 if (IS_IGDNG(dev))
1762 ier = I915_READ(DEIER) | I915_READ(GTIER);
1763 else
1764 ier = I915_READ(IER);
802c7eb6
JB
1765 if (!ier) {
1766 DRM_ERROR("something (likely vbetool) disabled "
1767 "interrupts, re-enabling\n");
1768 i915_driver_irq_preinstall(dev);
1769 i915_driver_irq_postinstall(dev);
1770 }
1771
673a394b
EA
1772 dev_priv->mm.waiting_gem_seqno = seqno;
1773 i915_user_irq_get(dev);
1774 ret = wait_event_interruptible(dev_priv->irq_queue,
1775 i915_seqno_passed(i915_get_gem_seqno(dev),
1776 seqno) ||
ba1234d1 1777 atomic_read(&dev_priv->mm.wedged));
673a394b
EA
1778 i915_user_irq_put(dev);
1779 dev_priv->mm.waiting_gem_seqno = 0;
1780 }
ba1234d1 1781 if (atomic_read(&dev_priv->mm.wedged))
673a394b
EA
1782 ret = -EIO;
1783
1784 if (ret && ret != -ERESTARTSYS)
1785 DRM_ERROR("%s returns %d (awaiting %d at %d)\n",
1786 __func__, ret, seqno, i915_get_gem_seqno(dev));
1787
1788 /* Directly dispatch request retiring. While we have the work queue
1789 * to handle this, the waiter on a request often wants an associated
1790 * buffer to have made it to the inactive list, and we would need
1791 * a separate wait queue to handle that.
1792 */
1793 if (ret == 0)
1794 i915_gem_retire_requests(dev);
1795
1796 return ret;
1797}
1798
1799static void
1800i915_gem_flush(struct drm_device *dev,
1801 uint32_t invalidate_domains,
1802 uint32_t flush_domains)
1803{
1804 drm_i915_private_t *dev_priv = dev->dev_private;
1805 uint32_t cmd;
1806 RING_LOCALS;
1807
1808#if WATCH_EXEC
1809 DRM_INFO("%s: invalidate %08x flush %08x\n", __func__,
1810 invalidate_domains, flush_domains);
1811#endif
1812
1813 if (flush_domains & I915_GEM_DOMAIN_CPU)
1814 drm_agp_chipset_flush(dev);
1815
21d509e3 1816 if ((invalidate_domains | flush_domains) & I915_GEM_GPU_DOMAINS) {
673a394b
EA
1817 /*
1818 * read/write caches:
1819 *
1820 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
1821 * only flushed if MI_NO_WRITE_FLUSH is unset. On 965, it is
1822 * also flushed at 2d versus 3d pipeline switches.
1823 *
1824 * read-only caches:
1825 *
1826 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
1827 * MI_READ_FLUSH is set, and is always flushed on 965.
1828 *
1829 * I915_GEM_DOMAIN_COMMAND may not exist?
1830 *
1831 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
1832 * invalidated when MI_EXE_FLUSH is set.
1833 *
1834 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
1835 * invalidated with every MI_FLUSH.
1836 *
1837 * TLBs:
1838 *
1839 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
1840 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
1841 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
1842 * are flushed at any MI_FLUSH.
1843 */
1844
1845 cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
1846 if ((invalidate_domains|flush_domains) &
1847 I915_GEM_DOMAIN_RENDER)
1848 cmd &= ~MI_NO_WRITE_FLUSH;
1849 if (!IS_I965G(dev)) {
1850 /*
1851 * On the 965, the sampler cache always gets flushed
1852 * and this bit is reserved.
1853 */
1854 if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
1855 cmd |= MI_READ_FLUSH;
1856 }
1857 if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
1858 cmd |= MI_EXE_FLUSH;
1859
1860#if WATCH_EXEC
1861 DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd);
1862#endif
1863 BEGIN_LP_RING(2);
1864 OUT_RING(cmd);
1865 OUT_RING(0); /* noop */
1866 ADVANCE_LP_RING();
1867 }
1868}
1869
1870/**
1871 * Ensures that all rendering to the object has completed and the object is
1872 * safe to unbind from the GTT or access from the CPU.
1873 */
1874static int
1875i915_gem_object_wait_rendering(struct drm_gem_object *obj)
1876{
1877 struct drm_device *dev = obj->dev;
1878 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1879 int ret;
1880
e47c68e9
EA
1881 /* This function only exists to support waiting for existing rendering,
1882 * not for emitting required flushes.
673a394b 1883 */
e47c68e9 1884 BUG_ON((obj->write_domain & I915_GEM_GPU_DOMAINS) != 0);
673a394b
EA
1885
1886 /* If there is rendering queued on the buffer being evicted, wait for
1887 * it.
1888 */
1889 if (obj_priv->active) {
1890#if WATCH_BUF
1891 DRM_INFO("%s: object %p wait for seqno %08x\n",
1892 __func__, obj, obj_priv->last_rendering_seqno);
1893#endif
1894 ret = i915_wait_request(dev, obj_priv->last_rendering_seqno);
1895 if (ret != 0)
1896 return ret;
1897 }
1898
1899 return 0;
1900}
1901
1902/**
1903 * Unbinds an object from the GTT aperture.
1904 */
0f973f27 1905int
673a394b
EA
1906i915_gem_object_unbind(struct drm_gem_object *obj)
1907{
1908 struct drm_device *dev = obj->dev;
1909 struct drm_i915_gem_object *obj_priv = obj->driver_private;
1910 int ret = 0;
1911
1912#if WATCH_BUF
1913 DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj);
1914 DRM_INFO("gtt_space %p\n", obj_priv->gtt_space);
1915#endif
1916 if (obj_priv->gtt_space == NULL)
1917 return 0;
1918
1919 if (obj_priv->pin_count != 0) {
1920 DRM_ERROR("Attempting to unbind pinned buffer\n");
1921 return -EINVAL;
1922 }
1923
5323fd04
EA
1924 /* blow away mappings if mapped through GTT */
1925 i915_gem_release_mmap(obj);
1926
1927 if (obj_priv->fence_reg != I915_FENCE_REG_NONE)
1928 i915_gem_clear_fence_reg(obj);
1929
673a394b
EA
1930 /* Move the object to the CPU domain to ensure that
1931 * any possible CPU writes while it's not in the GTT
1932 * are flushed when we go to remap it. This will
1933 * also ensure that all pending GPU writes are finished
1934 * before we unbind.
1935 */
e47c68e9 1936 ret = i915_gem_object_set_to_cpu_domain(obj, 1);
673a394b 1937 if (ret) {
e47c68e9
EA
1938 if (ret != -ERESTARTSYS)
1939 DRM_ERROR("set_domain failed: %d\n", ret);
673a394b
EA
1940 return ret;
1941 }
1942
5323fd04
EA
1943 BUG_ON(obj_priv->active);
1944
673a394b
EA
1945 if (obj_priv->agp_mem != NULL) {
1946 drm_unbind_agp(obj_priv->agp_mem);
1947 drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE);
1948 obj_priv->agp_mem = NULL;
1949 }
1950
856fa198 1951 i915_gem_object_put_pages(obj);
673a394b
EA
1952
1953 if (obj_priv->gtt_space) {
1954 atomic_dec(&dev->gtt_count);
1955 atomic_sub(obj->size, &dev->gtt_memory);
1956
1957 drm_mm_put_block(obj_priv->gtt_space);
1958 obj_priv->gtt_space = NULL;
1959 }
1960
1961 /* Remove ourselves from the LRU list if present. */
1962 if (!list_empty(&obj_priv->list))
1963 list_del_init(&obj_priv->list);
1964
1965 return 0;
1966}
1967
1968static int
1969i915_gem_evict_something(struct drm_device *dev)
1970{
1971 drm_i915_private_t *dev_priv = dev->dev_private;
1972 struct drm_gem_object *obj;
1973 struct drm_i915_gem_object *obj_priv;
1974 int ret = 0;
1975
1976 for (;;) {
1977 /* If there's an inactive buffer available now, grab it
1978 * and be done.
1979 */
1980 if (!list_empty(&dev_priv->mm.inactive_list)) {
1981 obj_priv = list_first_entry(&dev_priv->mm.inactive_list,
1982 struct drm_i915_gem_object,
1983 list);
1984 obj = obj_priv->obj;
1985 BUG_ON(obj_priv->pin_count != 0);
1986#if WATCH_LRU
1987 DRM_INFO("%s: evicting %p\n", __func__, obj);
1988#endif
1989 BUG_ON(obj_priv->active);
1990
1991 /* Wait on the rendering and unbind the buffer. */
1992 ret = i915_gem_object_unbind(obj);
1993 break;
1994 }
1995
1996 /* If we didn't get anything, but the ring is still processing
1997 * things, wait for one of those things to finish and hopefully
1998 * leave us a buffer to evict.
1999 */
2000 if (!list_empty(&dev_priv->mm.request_list)) {
2001 struct drm_i915_gem_request *request;
2002
2003 request = list_first_entry(&dev_priv->mm.request_list,
2004 struct drm_i915_gem_request,
2005 list);
2006
2007 ret = i915_wait_request(dev, request->seqno);
2008 if (ret)
2009 break;
2010
2011 /* if waiting caused an object to become inactive,
2012 * then loop around and wait for it. Otherwise, we
2013 * assume that waiting freed and unbound something,
2014 * so there should now be some space in the GTT
2015 */
2016 if (!list_empty(&dev_priv->mm.inactive_list))
2017 continue;
2018 break;
2019 }
2020
2021 /* If we didn't have anything on the request list but there
2022 * are buffers awaiting a flush, emit one and try again.
2023 * When we wait on it, those buffers waiting for that flush
2024 * will get moved to inactive.
2025 */
2026 if (!list_empty(&dev_priv->mm.flushing_list)) {
2027 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
2028 struct drm_i915_gem_object,
2029 list);
2030 obj = obj_priv->obj;
2031
2032 i915_gem_flush(dev,
2033 obj->write_domain,
2034 obj->write_domain);
b962442e 2035 i915_add_request(dev, NULL, obj->write_domain);
673a394b
EA
2036
2037 obj = NULL;
2038 continue;
2039 }
2040
2041 DRM_ERROR("inactive empty %d request empty %d "
2042 "flushing empty %d\n",
2043 list_empty(&dev_priv->mm.inactive_list),
2044 list_empty(&dev_priv->mm.request_list),
2045 list_empty(&dev_priv->mm.flushing_list));
2046 /* If we didn't do any of the above, there's nothing to be done
2047 * and we just can't fit it in.
2048 */
2939e1f5 2049 return -ENOSPC;
673a394b
EA
2050 }
2051 return ret;
2052}
2053
ac94a962
KP
2054static int
2055i915_gem_evict_everything(struct drm_device *dev)
2056{
2057 int ret;
2058
2059 for (;;) {
2060 ret = i915_gem_evict_something(dev);
2061 if (ret != 0)
2062 break;
2063 }
2939e1f5 2064 if (ret == -ENOSPC)
15c35334 2065 return 0;
ac94a962
KP
2066 return ret;
2067}
2068
6911a9b8 2069int
856fa198 2070i915_gem_object_get_pages(struct drm_gem_object *obj)
673a394b
EA
2071{
2072 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2073 int page_count, i;
2074 struct address_space *mapping;
2075 struct inode *inode;
2076 struct page *page;
2077 int ret;
2078
856fa198 2079 if (obj_priv->pages_refcount++ != 0)
673a394b
EA
2080 return 0;
2081
2082 /* Get the list of pages out of our struct file. They'll be pinned
2083 * at this point until we release them.
2084 */
2085 page_count = obj->size / PAGE_SIZE;
856fa198 2086 BUG_ON(obj_priv->pages != NULL);
8e7d2b2c 2087 obj_priv->pages = drm_calloc_large(page_count, sizeof(struct page *));
856fa198 2088 if (obj_priv->pages == NULL) {
673a394b 2089 DRM_ERROR("Faled to allocate page list\n");
856fa198 2090 obj_priv->pages_refcount--;
673a394b
EA
2091 return -ENOMEM;
2092 }
2093
2094 inode = obj->filp->f_path.dentry->d_inode;
2095 mapping = inode->i_mapping;
2096 for (i = 0; i < page_count; i++) {
2097 page = read_mapping_page(mapping, i, NULL);
2098 if (IS_ERR(page)) {
2099 ret = PTR_ERR(page);
2100 DRM_ERROR("read_mapping_page failed: %d\n", ret);
856fa198 2101 i915_gem_object_put_pages(obj);
673a394b
EA
2102 return ret;
2103 }
856fa198 2104 obj_priv->pages[i] = page;
673a394b 2105 }
280b713b
EA
2106
2107 if (obj_priv->tiling_mode != I915_TILING_NONE)
2108 i915_gem_object_do_bit_17_swizzle(obj);
2109
673a394b
EA
2110 return 0;
2111}
2112
de151cf6
JB
2113static void i965_write_fence_reg(struct drm_i915_fence_reg *reg)
2114{
2115 struct drm_gem_object *obj = reg->obj;
2116 struct drm_device *dev = obj->dev;
2117 drm_i915_private_t *dev_priv = dev->dev_private;
2118 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2119 int regnum = obj_priv->fence_reg;
2120 uint64_t val;
2121
2122 val = (uint64_t)((obj_priv->gtt_offset + obj->size - 4096) &
2123 0xfffff000) << 32;
2124 val |= obj_priv->gtt_offset & 0xfffff000;
2125 val |= ((obj_priv->stride / 128) - 1) << I965_FENCE_PITCH_SHIFT;
2126 if (obj_priv->tiling_mode == I915_TILING_Y)
2127 val |= 1 << I965_FENCE_TILING_Y_SHIFT;
2128 val |= I965_FENCE_REG_VALID;
2129
2130 I915_WRITE64(FENCE_REG_965_0 + (regnum * 8), val);
2131}
2132
2133static void i915_write_fence_reg(struct drm_i915_fence_reg *reg)
2134{
2135 struct drm_gem_object *obj = reg->obj;
2136 struct drm_device *dev = obj->dev;
2137 drm_i915_private_t *dev_priv = dev->dev_private;
2138 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2139 int regnum = obj_priv->fence_reg;
0f973f27 2140 int tile_width;
dc529a4f 2141 uint32_t fence_reg, val;
de151cf6
JB
2142 uint32_t pitch_val;
2143
2144 if ((obj_priv->gtt_offset & ~I915_FENCE_START_MASK) ||
2145 (obj_priv->gtt_offset & (obj->size - 1))) {
f06da264 2146 WARN(1, "%s: object 0x%08x not 1M or size (0x%zx) aligned\n",
0f973f27 2147 __func__, obj_priv->gtt_offset, obj->size);
de151cf6
JB
2148 return;
2149 }
2150
0f973f27
JB
2151 if (obj_priv->tiling_mode == I915_TILING_Y &&
2152 HAS_128_BYTE_Y_TILING(dev))
2153 tile_width = 128;
de151cf6 2154 else
0f973f27
JB
2155 tile_width = 512;
2156
2157 /* Note: pitch better be a power of two tile widths */
2158 pitch_val = obj_priv->stride / tile_width;
2159 pitch_val = ffs(pitch_val) - 1;
de151cf6
JB
2160
2161 val = obj_priv->gtt_offset;
2162 if (obj_priv->tiling_mode == I915_TILING_Y)
2163 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
2164 val |= I915_FENCE_SIZE_BITS(obj->size);
2165 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2166 val |= I830_FENCE_REG_VALID;
2167
dc529a4f
EA
2168 if (regnum < 8)
2169 fence_reg = FENCE_REG_830_0 + (regnum * 4);
2170 else
2171 fence_reg = FENCE_REG_945_8 + ((regnum - 8) * 4);
2172 I915_WRITE(fence_reg, val);
de151cf6
JB
2173}
2174
2175static void i830_write_fence_reg(struct drm_i915_fence_reg *reg)
2176{
2177 struct drm_gem_object *obj = reg->obj;
2178 struct drm_device *dev = obj->dev;
2179 drm_i915_private_t *dev_priv = dev->dev_private;
2180 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2181 int regnum = obj_priv->fence_reg;
2182 uint32_t val;
2183 uint32_t pitch_val;
8d7773a3 2184 uint32_t fence_size_bits;
de151cf6 2185
8d7773a3 2186 if ((obj_priv->gtt_offset & ~I830_FENCE_START_MASK) ||
de151cf6 2187 (obj_priv->gtt_offset & (obj->size - 1))) {
8d7773a3 2188 WARN(1, "%s: object 0x%08x not 512K or size aligned\n",
0f973f27 2189 __func__, obj_priv->gtt_offset);
de151cf6
JB
2190 return;
2191 }
2192
e76a16de
EA
2193 pitch_val = obj_priv->stride / 128;
2194 pitch_val = ffs(pitch_val) - 1;
2195 WARN_ON(pitch_val > I830_FENCE_MAX_PITCH_VAL);
2196
de151cf6
JB
2197 val = obj_priv->gtt_offset;
2198 if (obj_priv->tiling_mode == I915_TILING_Y)
2199 val |= 1 << I830_FENCE_TILING_Y_SHIFT;
8d7773a3
DV
2200 fence_size_bits = I830_FENCE_SIZE_BITS(obj->size);
2201 WARN_ON(fence_size_bits & ~0x00000f00);
2202 val |= fence_size_bits;
de151cf6
JB
2203 val |= pitch_val << I830_FENCE_PITCH_SHIFT;
2204 val |= I830_FENCE_REG_VALID;
2205
2206 I915_WRITE(FENCE_REG_830_0 + (regnum * 4), val);
de151cf6
JB
2207}
2208
2209/**
2210 * i915_gem_object_get_fence_reg - set up a fence reg for an object
2211 * @obj: object to map through a fence reg
2212 *
2213 * When mapping objects through the GTT, userspace wants to be able to write
2214 * to them without having to worry about swizzling if the object is tiled.
2215 *
2216 * This function walks the fence regs looking for a free one for @obj,
2217 * stealing one if it can't find any.
2218 *
2219 * It then sets up the reg based on the object's properties: address, pitch
2220 * and tiling format.
2221 */
8c4b8c3f
CW
2222int
2223i915_gem_object_get_fence_reg(struct drm_gem_object *obj)
de151cf6
JB
2224{
2225 struct drm_device *dev = obj->dev;
79e53945 2226 struct drm_i915_private *dev_priv = dev->dev_private;
de151cf6
JB
2227 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2228 struct drm_i915_fence_reg *reg = NULL;
fc7170ba
CW
2229 struct drm_i915_gem_object *old_obj_priv = NULL;
2230 int i, ret, avail;
de151cf6 2231
a09ba7fa
EA
2232 /* Just update our place in the LRU if our fence is getting used. */
2233 if (obj_priv->fence_reg != I915_FENCE_REG_NONE) {
2234 list_move_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2235 return 0;
2236 }
2237
de151cf6
JB
2238 switch (obj_priv->tiling_mode) {
2239 case I915_TILING_NONE:
2240 WARN(1, "allocating a fence for non-tiled object?\n");
2241 break;
2242 case I915_TILING_X:
0f973f27
JB
2243 if (!obj_priv->stride)
2244 return -EINVAL;
2245 WARN((obj_priv->stride & (512 - 1)),
2246 "object 0x%08x is X tiled but has non-512B pitch\n",
2247 obj_priv->gtt_offset);
de151cf6
JB
2248 break;
2249 case I915_TILING_Y:
0f973f27
JB
2250 if (!obj_priv->stride)
2251 return -EINVAL;
2252 WARN((obj_priv->stride & (128 - 1)),
2253 "object 0x%08x is Y tiled but has non-128B pitch\n",
2254 obj_priv->gtt_offset);
de151cf6
JB
2255 break;
2256 }
2257
2258 /* First try to find a free reg */
fc7170ba 2259 avail = 0;
de151cf6
JB
2260 for (i = dev_priv->fence_reg_start; i < dev_priv->num_fence_regs; i++) {
2261 reg = &dev_priv->fence_regs[i];
2262 if (!reg->obj)
2263 break;
fc7170ba
CW
2264
2265 old_obj_priv = reg->obj->driver_private;
2266 if (!old_obj_priv->pin_count)
2267 avail++;
de151cf6
JB
2268 }
2269
2270 /* None available, try to steal one or wait for a user to finish */
2271 if (i == dev_priv->num_fence_regs) {
a09ba7fa 2272 struct drm_gem_object *old_obj = NULL;
de151cf6 2273
fc7170ba 2274 if (avail == 0)
2939e1f5 2275 return -ENOSPC;
fc7170ba 2276
a09ba7fa
EA
2277 list_for_each_entry(old_obj_priv, &dev_priv->mm.fence_list,
2278 fence_list) {
2279 old_obj = old_obj_priv->obj;
d7619c4b
CW
2280
2281 if (old_obj_priv->pin_count)
2282 continue;
2283
a09ba7fa
EA
2284 /* Take a reference, as otherwise the wait_rendering
2285 * below may cause the object to get freed out from
2286 * under us.
2287 */
2288 drm_gem_object_reference(old_obj);
2289
d7619c4b
CW
2290 /* i915 uses fences for GPU access to tiled buffers */
2291 if (IS_I965G(dev) || !old_obj_priv->active)
de151cf6 2292 break;
d7619c4b 2293
a09ba7fa
EA
2294 /* This brings the object to the head of the LRU if it
2295 * had been written to. The only way this should
2296 * result in us waiting longer than the expected
2297 * optimal amount of time is if there was a
2298 * fence-using buffer later that was read-only.
2299 */
2300 i915_gem_object_flush_gpu_write_domain(old_obj);
2301 ret = i915_gem_object_wait_rendering(old_obj);
58c2fb64
CW
2302 if (ret != 0) {
2303 drm_gem_object_unreference(old_obj);
d7619c4b 2304 return ret;
de151cf6 2305 }
d7619c4b 2306
a09ba7fa 2307 break;
de151cf6
JB
2308 }
2309
2310 /*
2311 * Zap this virtual mapping so we can set up a fence again
2312 * for this object next time we need it.
2313 */
58c2fb64
CW
2314 i915_gem_release_mmap(old_obj);
2315
a09ba7fa 2316 i = old_obj_priv->fence_reg;
58c2fb64
CW
2317 reg = &dev_priv->fence_regs[i];
2318
de151cf6 2319 old_obj_priv->fence_reg = I915_FENCE_REG_NONE;
a09ba7fa 2320 list_del_init(&old_obj_priv->fence_list);
58c2fb64 2321
a09ba7fa 2322 drm_gem_object_unreference(old_obj);
de151cf6
JB
2323 }
2324
2325 obj_priv->fence_reg = i;
a09ba7fa
EA
2326 list_add_tail(&obj_priv->fence_list, &dev_priv->mm.fence_list);
2327
de151cf6
JB
2328 reg->obj = obj;
2329
2330 if (IS_I965G(dev))
2331 i965_write_fence_reg(reg);
2332 else if (IS_I9XX(dev))
2333 i915_write_fence_reg(reg);
2334 else
2335 i830_write_fence_reg(reg);
d9ddcb96
EA
2336
2337 return 0;
de151cf6
JB
2338}
2339
2340/**
2341 * i915_gem_clear_fence_reg - clear out fence register info
2342 * @obj: object to clear
2343 *
2344 * Zeroes out the fence register itself and clears out the associated
2345 * data structures in dev_priv and obj_priv.
2346 */
2347static void
2348i915_gem_clear_fence_reg(struct drm_gem_object *obj)
2349{
2350 struct drm_device *dev = obj->dev;
79e53945 2351 drm_i915_private_t *dev_priv = dev->dev_private;
de151cf6
JB
2352 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2353
2354 if (IS_I965G(dev))
2355 I915_WRITE64(FENCE_REG_965_0 + (obj_priv->fence_reg * 8), 0);
dc529a4f
EA
2356 else {
2357 uint32_t fence_reg;
2358
2359 if (obj_priv->fence_reg < 8)
2360 fence_reg = FENCE_REG_830_0 + obj_priv->fence_reg * 4;
2361 else
2362 fence_reg = FENCE_REG_945_8 + (obj_priv->fence_reg -
2363 8) * 4;
2364
2365 I915_WRITE(fence_reg, 0);
2366 }
de151cf6
JB
2367
2368 dev_priv->fence_regs[obj_priv->fence_reg].obj = NULL;
2369 obj_priv->fence_reg = I915_FENCE_REG_NONE;
a09ba7fa 2370 list_del_init(&obj_priv->fence_list);
de151cf6
JB
2371}
2372
52dc7d32
CW
2373/**
2374 * i915_gem_object_put_fence_reg - waits on outstanding fenced access
2375 * to the buffer to finish, and then resets the fence register.
2376 * @obj: tiled object holding a fence register.
2377 *
2378 * Zeroes out the fence register itself and clears out the associated
2379 * data structures in dev_priv and obj_priv.
2380 */
2381int
2382i915_gem_object_put_fence_reg(struct drm_gem_object *obj)
2383{
2384 struct drm_device *dev = obj->dev;
2385 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2386
2387 if (obj_priv->fence_reg == I915_FENCE_REG_NONE)
2388 return 0;
2389
2390 /* On the i915, GPU access to tiled buffers is via a fence,
2391 * therefore we must wait for any outstanding access to complete
2392 * before clearing the fence.
2393 */
2394 if (!IS_I965G(dev)) {
2395 int ret;
2396
2397 i915_gem_object_flush_gpu_write_domain(obj);
2398 i915_gem_object_flush_gtt_write_domain(obj);
2399 ret = i915_gem_object_wait_rendering(obj);
2400 if (ret != 0)
2401 return ret;
2402 }
2403
2404 i915_gem_clear_fence_reg (obj);
2405
2406 return 0;
2407}
2408
673a394b
EA
2409/**
2410 * Finds free space in the GTT aperture and binds the object there.
2411 */
2412static int
2413i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
2414{
2415 struct drm_device *dev = obj->dev;
2416 drm_i915_private_t *dev_priv = dev->dev_private;
2417 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2418 struct drm_mm_node *free_space;
2419 int page_count, ret;
2420
9bb2d6f9
EA
2421 if (dev_priv->mm.suspended)
2422 return -EBUSY;
3ef94daa
CW
2423
2424 if (obj_priv->madv == I915_MADV_DONTNEED) {
2425 DRM_ERROR("Attempting to bind a purgeable object\n");
2426 return -EINVAL;
2427 }
2428
673a394b 2429 if (alignment == 0)
0f973f27 2430 alignment = i915_gem_get_gtt_alignment(obj);
8d7773a3 2431 if (alignment & (i915_gem_get_gtt_alignment(obj) - 1)) {
673a394b
EA
2432 DRM_ERROR("Invalid object alignment requested %u\n", alignment);
2433 return -EINVAL;
2434 }
2435
2436 search_free:
2437 free_space = drm_mm_search_free(&dev_priv->mm.gtt_space,
2438 obj->size, alignment, 0);
2439 if (free_space != NULL) {
2440 obj_priv->gtt_space = drm_mm_get_block(free_space, obj->size,
2441 alignment);
2442 if (obj_priv->gtt_space != NULL) {
2443 obj_priv->gtt_space->private = obj;
2444 obj_priv->gtt_offset = obj_priv->gtt_space->start;
2445 }
2446 }
2447 if (obj_priv->gtt_space == NULL) {
5e118f41
CW
2448 bool lists_empty;
2449
673a394b
EA
2450 /* If the gtt is empty and we're still having trouble
2451 * fitting our object in, we're out of memory.
2452 */
2453#if WATCH_LRU
2454 DRM_INFO("%s: GTT full, evicting something\n", __func__);
2455#endif
5e118f41
CW
2456 spin_lock(&dev_priv->mm.active_list_lock);
2457 lists_empty = (list_empty(&dev_priv->mm.inactive_list) &&
2458 list_empty(&dev_priv->mm.flushing_list) &&
2459 list_empty(&dev_priv->mm.active_list));
2460 spin_unlock(&dev_priv->mm.active_list_lock);
2461 if (lists_empty) {
673a394b 2462 DRM_ERROR("GTT full, but LRU list empty\n");
2939e1f5 2463 return -ENOSPC;
673a394b
EA
2464 }
2465
2466 ret = i915_gem_evict_something(dev);
2467 if (ret != 0) {
ac94a962
KP
2468 if (ret != -ERESTARTSYS)
2469 DRM_ERROR("Failed to evict a buffer %d\n", ret);
673a394b
EA
2470 return ret;
2471 }
2472 goto search_free;
2473 }
2474
2475#if WATCH_BUF
cfd43c02 2476 DRM_INFO("Binding object of size %zd at 0x%08x\n",
673a394b
EA
2477 obj->size, obj_priv->gtt_offset);
2478#endif
856fa198 2479 ret = i915_gem_object_get_pages(obj);
673a394b
EA
2480 if (ret) {
2481 drm_mm_put_block(obj_priv->gtt_space);
2482 obj_priv->gtt_space = NULL;
2483 return ret;
2484 }
2485
2486 page_count = obj->size / PAGE_SIZE;
2487 /* Create an AGP memory structure pointing at our pages, and bind it
2488 * into the GTT.
2489 */
2490 obj_priv->agp_mem = drm_agp_bind_pages(dev,
856fa198 2491 obj_priv->pages,
673a394b 2492 page_count,
ba1eb1d8
KP
2493 obj_priv->gtt_offset,
2494 obj_priv->agp_type);
673a394b 2495 if (obj_priv->agp_mem == NULL) {
856fa198 2496 i915_gem_object_put_pages(obj);
673a394b
EA
2497 drm_mm_put_block(obj_priv->gtt_space);
2498 obj_priv->gtt_space = NULL;
2499 return -ENOMEM;
2500 }
2501 atomic_inc(&dev->gtt_count);
2502 atomic_add(obj->size, &dev->gtt_memory);
2503
2504 /* Assert that the object is not currently in any GPU domain. As it
2505 * wasn't in the GTT, there shouldn't be any way it could have been in
2506 * a GPU cache
2507 */
21d509e3
CW
2508 BUG_ON(obj->read_domains & I915_GEM_GPU_DOMAINS);
2509 BUG_ON(obj->write_domain & I915_GEM_GPU_DOMAINS);
673a394b
EA
2510
2511 return 0;
2512}
2513
2514void
2515i915_gem_clflush_object(struct drm_gem_object *obj)
2516{
2517 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2518
2519 /* If we don't have a page list set up, then we're not pinned
2520 * to GPU, and we can ignore the cache flush because it'll happen
2521 * again at bind time.
2522 */
856fa198 2523 if (obj_priv->pages == NULL)
673a394b
EA
2524 return;
2525
856fa198 2526 drm_clflush_pages(obj_priv->pages, obj->size / PAGE_SIZE);
673a394b
EA
2527}
2528
e47c68e9
EA
2529/** Flushes any GPU write domain for the object if it's dirty. */
2530static void
2531i915_gem_object_flush_gpu_write_domain(struct drm_gem_object *obj)
2532{
2533 struct drm_device *dev = obj->dev;
2534 uint32_t seqno;
2535
2536 if ((obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
2537 return;
2538
2539 /* Queue the GPU write cache flushing we need. */
2540 i915_gem_flush(dev, 0, obj->write_domain);
b962442e 2541 seqno = i915_add_request(dev, NULL, obj->write_domain);
e47c68e9
EA
2542 obj->write_domain = 0;
2543 i915_gem_object_move_to_active(obj, seqno);
2544}
2545
2546/** Flushes the GTT write domain for the object if it's dirty. */
2547static void
2548i915_gem_object_flush_gtt_write_domain(struct drm_gem_object *obj)
2549{
2550 if (obj->write_domain != I915_GEM_DOMAIN_GTT)
2551 return;
2552
2553 /* No actual flushing is required for the GTT write domain. Writes
2554 * to it immediately go to main memory as far as we know, so there's
2555 * no chipset flush. It also doesn't land in render cache.
2556 */
2557 obj->write_domain = 0;
2558}
2559
2560/** Flushes the CPU write domain for the object if it's dirty. */
2561static void
2562i915_gem_object_flush_cpu_write_domain(struct drm_gem_object *obj)
2563{
2564 struct drm_device *dev = obj->dev;
2565
2566 if (obj->write_domain != I915_GEM_DOMAIN_CPU)
2567 return;
2568
2569 i915_gem_clflush_object(obj);
2570 drm_agp_chipset_flush(dev);
2571 obj->write_domain = 0;
2572}
2573
2ef7eeaa
EA
2574/**
2575 * Moves a single object to the GTT read, and possibly write domain.
2576 *
2577 * This function returns when the move is complete, including waiting on
2578 * flushes to occur.
2579 */
79e53945 2580int
2ef7eeaa
EA
2581i915_gem_object_set_to_gtt_domain(struct drm_gem_object *obj, int write)
2582{
2ef7eeaa 2583 struct drm_i915_gem_object *obj_priv = obj->driver_private;
e47c68e9 2584 int ret;
2ef7eeaa 2585
02354392
EA
2586 /* Not valid to be called on unbound objects. */
2587 if (obj_priv->gtt_space == NULL)
2588 return -EINVAL;
2589
e47c68e9
EA
2590 i915_gem_object_flush_gpu_write_domain(obj);
2591 /* Wait on any GPU rendering and flushing to occur. */
2592 ret = i915_gem_object_wait_rendering(obj);
2593 if (ret != 0)
2594 return ret;
2595
2596 /* If we're writing through the GTT domain, then CPU and GPU caches
2597 * will need to be invalidated at next use.
2ef7eeaa 2598 */
e47c68e9
EA
2599 if (write)
2600 obj->read_domains &= I915_GEM_DOMAIN_GTT;
2ef7eeaa 2601
e47c68e9 2602 i915_gem_object_flush_cpu_write_domain(obj);
2ef7eeaa 2603
e47c68e9
EA
2604 /* It should now be out of any other write domains, and we can update
2605 * the domain values for our changes.
2606 */
2607 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_GTT) != 0);
2608 obj->read_domains |= I915_GEM_DOMAIN_GTT;
2609 if (write) {
2610 obj->write_domain = I915_GEM_DOMAIN_GTT;
2611 obj_priv->dirty = 1;
2ef7eeaa
EA
2612 }
2613
e47c68e9
EA
2614 return 0;
2615}
2616
2617/**
2618 * Moves a single object to the CPU read, and possibly write domain.
2619 *
2620 * This function returns when the move is complete, including waiting on
2621 * flushes to occur.
2622 */
2623static int
2624i915_gem_object_set_to_cpu_domain(struct drm_gem_object *obj, int write)
2625{
e47c68e9
EA
2626 int ret;
2627
2628 i915_gem_object_flush_gpu_write_domain(obj);
2ef7eeaa 2629 /* Wait on any GPU rendering and flushing to occur. */
e47c68e9
EA
2630 ret = i915_gem_object_wait_rendering(obj);
2631 if (ret != 0)
2632 return ret;
2ef7eeaa 2633
e47c68e9 2634 i915_gem_object_flush_gtt_write_domain(obj);
2ef7eeaa 2635
e47c68e9
EA
2636 /* If we have a partially-valid cache of the object in the CPU,
2637 * finish invalidating it and free the per-page flags.
2ef7eeaa 2638 */
e47c68e9 2639 i915_gem_object_set_to_full_cpu_read_domain(obj);
2ef7eeaa 2640
e47c68e9
EA
2641 /* Flush the CPU cache if it's still invalid. */
2642 if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) {
2ef7eeaa 2643 i915_gem_clflush_object(obj);
2ef7eeaa 2644
e47c68e9 2645 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2ef7eeaa
EA
2646 }
2647
2648 /* It should now be out of any other write domains, and we can update
2649 * the domain values for our changes.
2650 */
e47c68e9
EA
2651 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2652
2653 /* If we're writing through the CPU, then the GPU read domains will
2654 * need to be invalidated at next use.
2655 */
2656 if (write) {
2657 obj->read_domains &= I915_GEM_DOMAIN_CPU;
2658 obj->write_domain = I915_GEM_DOMAIN_CPU;
2659 }
2ef7eeaa
EA
2660
2661 return 0;
2662}
2663
673a394b
EA
2664/*
2665 * Set the next domain for the specified object. This
2666 * may not actually perform the necessary flushing/invaliding though,
2667 * as that may want to be batched with other set_domain operations
2668 *
2669 * This is (we hope) the only really tricky part of gem. The goal
2670 * is fairly simple -- track which caches hold bits of the object
2671 * and make sure they remain coherent. A few concrete examples may
2672 * help to explain how it works. For shorthand, we use the notation
2673 * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
2674 * a pair of read and write domain masks.
2675 *
2676 * Case 1: the batch buffer
2677 *
2678 * 1. Allocated
2679 * 2. Written by CPU
2680 * 3. Mapped to GTT
2681 * 4. Read by GPU
2682 * 5. Unmapped from GTT
2683 * 6. Freed
2684 *
2685 * Let's take these a step at a time
2686 *
2687 * 1. Allocated
2688 * Pages allocated from the kernel may still have
2689 * cache contents, so we set them to (CPU, CPU) always.
2690 * 2. Written by CPU (using pwrite)
2691 * The pwrite function calls set_domain (CPU, CPU) and
2692 * this function does nothing (as nothing changes)
2693 * 3. Mapped by GTT
2694 * This function asserts that the object is not
2695 * currently in any GPU-based read or write domains
2696 * 4. Read by GPU
2697 * i915_gem_execbuffer calls set_domain (COMMAND, 0).
2698 * As write_domain is zero, this function adds in the
2699 * current read domains (CPU+COMMAND, 0).
2700 * flush_domains is set to CPU.
2701 * invalidate_domains is set to COMMAND
2702 * clflush is run to get data out of the CPU caches
2703 * then i915_dev_set_domain calls i915_gem_flush to
2704 * emit an MI_FLUSH and drm_agp_chipset_flush
2705 * 5. Unmapped from GTT
2706 * i915_gem_object_unbind calls set_domain (CPU, CPU)
2707 * flush_domains and invalidate_domains end up both zero
2708 * so no flushing/invalidating happens
2709 * 6. Freed
2710 * yay, done
2711 *
2712 * Case 2: The shared render buffer
2713 *
2714 * 1. Allocated
2715 * 2. Mapped to GTT
2716 * 3. Read/written by GPU
2717 * 4. set_domain to (CPU,CPU)
2718 * 5. Read/written by CPU
2719 * 6. Read/written by GPU
2720 *
2721 * 1. Allocated
2722 * Same as last example, (CPU, CPU)
2723 * 2. Mapped to GTT
2724 * Nothing changes (assertions find that it is not in the GPU)
2725 * 3. Read/written by GPU
2726 * execbuffer calls set_domain (RENDER, RENDER)
2727 * flush_domains gets CPU
2728 * invalidate_domains gets GPU
2729 * clflush (obj)
2730 * MI_FLUSH and drm_agp_chipset_flush
2731 * 4. set_domain (CPU, CPU)
2732 * flush_domains gets GPU
2733 * invalidate_domains gets CPU
2734 * wait_rendering (obj) to make sure all drawing is complete.
2735 * This will include an MI_FLUSH to get the data from GPU
2736 * to memory
2737 * clflush (obj) to invalidate the CPU cache
2738 * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
2739 * 5. Read/written by CPU
2740 * cache lines are loaded and dirtied
2741 * 6. Read written by GPU
2742 * Same as last GPU access
2743 *
2744 * Case 3: The constant buffer
2745 *
2746 * 1. Allocated
2747 * 2. Written by CPU
2748 * 3. Read by GPU
2749 * 4. Updated (written) by CPU again
2750 * 5. Read by GPU
2751 *
2752 * 1. Allocated
2753 * (CPU, CPU)
2754 * 2. Written by CPU
2755 * (CPU, CPU)
2756 * 3. Read by GPU
2757 * (CPU+RENDER, 0)
2758 * flush_domains = CPU
2759 * invalidate_domains = RENDER
2760 * clflush (obj)
2761 * MI_FLUSH
2762 * drm_agp_chipset_flush
2763 * 4. Updated (written) by CPU again
2764 * (CPU, CPU)
2765 * flush_domains = 0 (no previous write domain)
2766 * invalidate_domains = 0 (no new read domains)
2767 * 5. Read by GPU
2768 * (CPU+RENDER, 0)
2769 * flush_domains = CPU
2770 * invalidate_domains = RENDER
2771 * clflush (obj)
2772 * MI_FLUSH
2773 * drm_agp_chipset_flush
2774 */
c0d90829 2775static void
8b0e378a 2776i915_gem_object_set_to_gpu_domain(struct drm_gem_object *obj)
673a394b
EA
2777{
2778 struct drm_device *dev = obj->dev;
2779 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2780 uint32_t invalidate_domains = 0;
2781 uint32_t flush_domains = 0;
e47c68e9 2782
8b0e378a
EA
2783 BUG_ON(obj->pending_read_domains & I915_GEM_DOMAIN_CPU);
2784 BUG_ON(obj->pending_write_domain == I915_GEM_DOMAIN_CPU);
673a394b 2785
652c393a
JB
2786 intel_mark_busy(dev, obj);
2787
673a394b
EA
2788#if WATCH_BUF
2789 DRM_INFO("%s: object %p read %08x -> %08x write %08x -> %08x\n",
2790 __func__, obj,
8b0e378a
EA
2791 obj->read_domains, obj->pending_read_domains,
2792 obj->write_domain, obj->pending_write_domain);
673a394b
EA
2793#endif
2794 /*
2795 * If the object isn't moving to a new write domain,
2796 * let the object stay in multiple read domains
2797 */
8b0e378a
EA
2798 if (obj->pending_write_domain == 0)
2799 obj->pending_read_domains |= obj->read_domains;
673a394b
EA
2800 else
2801 obj_priv->dirty = 1;
2802
2803 /*
2804 * Flush the current write domain if
2805 * the new read domains don't match. Invalidate
2806 * any read domains which differ from the old
2807 * write domain
2808 */
8b0e378a
EA
2809 if (obj->write_domain &&
2810 obj->write_domain != obj->pending_read_domains) {
673a394b 2811 flush_domains |= obj->write_domain;
8b0e378a
EA
2812 invalidate_domains |=
2813 obj->pending_read_domains & ~obj->write_domain;
673a394b
EA
2814 }
2815 /*
2816 * Invalidate any read caches which may have
2817 * stale data. That is, any new read domains.
2818 */
8b0e378a 2819 invalidate_domains |= obj->pending_read_domains & ~obj->read_domains;
673a394b
EA
2820 if ((flush_domains | invalidate_domains) & I915_GEM_DOMAIN_CPU) {
2821#if WATCH_BUF
2822 DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n",
2823 __func__, flush_domains, invalidate_domains);
2824#endif
673a394b
EA
2825 i915_gem_clflush_object(obj);
2826 }
2827
efbeed96
EA
2828 /* The actual obj->write_domain will be updated with
2829 * pending_write_domain after we emit the accumulated flush for all
2830 * of our domain changes in execbuffers (which clears objects'
2831 * write_domains). So if we have a current write domain that we
2832 * aren't changing, set pending_write_domain to that.
2833 */
2834 if (flush_domains == 0 && obj->pending_write_domain == 0)
2835 obj->pending_write_domain = obj->write_domain;
8b0e378a 2836 obj->read_domains = obj->pending_read_domains;
673a394b
EA
2837
2838 dev->invalidate_domains |= invalidate_domains;
2839 dev->flush_domains |= flush_domains;
2840#if WATCH_BUF
2841 DRM_INFO("%s: read %08x write %08x invalidate %08x flush %08x\n",
2842 __func__,
2843 obj->read_domains, obj->write_domain,
2844 dev->invalidate_domains, dev->flush_domains);
2845#endif
673a394b
EA
2846}
2847
2848/**
e47c68e9 2849 * Moves the object from a partially CPU read to a full one.
673a394b 2850 *
e47c68e9
EA
2851 * Note that this only resolves i915_gem_object_set_cpu_read_domain_range(),
2852 * and doesn't handle transitioning from !(read_domains & I915_GEM_DOMAIN_CPU).
673a394b 2853 */
e47c68e9
EA
2854static void
2855i915_gem_object_set_to_full_cpu_read_domain(struct drm_gem_object *obj)
673a394b
EA
2856{
2857 struct drm_i915_gem_object *obj_priv = obj->driver_private;
673a394b 2858
e47c68e9
EA
2859 if (!obj_priv->page_cpu_valid)
2860 return;
2861
2862 /* If we're partially in the CPU read domain, finish moving it in.
2863 */
2864 if (obj->read_domains & I915_GEM_DOMAIN_CPU) {
2865 int i;
2866
2867 for (i = 0; i <= (obj->size - 1) / PAGE_SIZE; i++) {
2868 if (obj_priv->page_cpu_valid[i])
2869 continue;
856fa198 2870 drm_clflush_pages(obj_priv->pages + i, 1);
e47c68e9 2871 }
e47c68e9
EA
2872 }
2873
2874 /* Free the page_cpu_valid mappings which are now stale, whether
2875 * or not we've got I915_GEM_DOMAIN_CPU.
2876 */
9a298b2a 2877 kfree(obj_priv->page_cpu_valid);
e47c68e9
EA
2878 obj_priv->page_cpu_valid = NULL;
2879}
2880
2881/**
2882 * Set the CPU read domain on a range of the object.
2883 *
2884 * The object ends up with I915_GEM_DOMAIN_CPU in its read flags although it's
2885 * not entirely valid. The page_cpu_valid member of the object flags which
2886 * pages have been flushed, and will be respected by
2887 * i915_gem_object_set_to_cpu_domain() if it's called on to get a valid mapping
2888 * of the whole object.
2889 *
2890 * This function returns when the move is complete, including waiting on
2891 * flushes to occur.
2892 */
2893static int
2894i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
2895 uint64_t offset, uint64_t size)
2896{
2897 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2898 int i, ret;
673a394b 2899
e47c68e9
EA
2900 if (offset == 0 && size == obj->size)
2901 return i915_gem_object_set_to_cpu_domain(obj, 0);
673a394b 2902
e47c68e9
EA
2903 i915_gem_object_flush_gpu_write_domain(obj);
2904 /* Wait on any GPU rendering and flushing to occur. */
6a47baa6 2905 ret = i915_gem_object_wait_rendering(obj);
e47c68e9 2906 if (ret != 0)
6a47baa6 2907 return ret;
e47c68e9
EA
2908 i915_gem_object_flush_gtt_write_domain(obj);
2909
2910 /* If we're already fully in the CPU read domain, we're done. */
2911 if (obj_priv->page_cpu_valid == NULL &&
2912 (obj->read_domains & I915_GEM_DOMAIN_CPU) != 0)
2913 return 0;
673a394b 2914
e47c68e9
EA
2915 /* Otherwise, create/clear the per-page CPU read domain flag if we're
2916 * newly adding I915_GEM_DOMAIN_CPU
2917 */
673a394b 2918 if (obj_priv->page_cpu_valid == NULL) {
9a298b2a
EA
2919 obj_priv->page_cpu_valid = kzalloc(obj->size / PAGE_SIZE,
2920 GFP_KERNEL);
e47c68e9
EA
2921 if (obj_priv->page_cpu_valid == NULL)
2922 return -ENOMEM;
2923 } else if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0)
2924 memset(obj_priv->page_cpu_valid, 0, obj->size / PAGE_SIZE);
673a394b
EA
2925
2926 /* Flush the cache on any pages that are still invalid from the CPU's
2927 * perspective.
2928 */
e47c68e9
EA
2929 for (i = offset / PAGE_SIZE; i <= (offset + size - 1) / PAGE_SIZE;
2930 i++) {
673a394b
EA
2931 if (obj_priv->page_cpu_valid[i])
2932 continue;
2933
856fa198 2934 drm_clflush_pages(obj_priv->pages + i, 1);
673a394b
EA
2935
2936 obj_priv->page_cpu_valid[i] = 1;
2937 }
2938
e47c68e9
EA
2939 /* It should now be out of any other write domains, and we can update
2940 * the domain values for our changes.
2941 */
2942 BUG_ON((obj->write_domain & ~I915_GEM_DOMAIN_CPU) != 0);
2943
2944 obj->read_domains |= I915_GEM_DOMAIN_CPU;
2945
673a394b
EA
2946 return 0;
2947}
2948
673a394b
EA
2949/**
2950 * Pin an object to the GTT and evaluate the relocations landing in it.
2951 */
2952static int
2953i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
2954 struct drm_file *file_priv,
40a5f0de
EA
2955 struct drm_i915_gem_exec_object *entry,
2956 struct drm_i915_gem_relocation_entry *relocs)
673a394b
EA
2957{
2958 struct drm_device *dev = obj->dev;
0839ccb8 2959 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
2960 struct drm_i915_gem_object *obj_priv = obj->driver_private;
2961 int i, ret;
0839ccb8 2962 void __iomem *reloc_page;
673a394b
EA
2963
2964 /* Choose the GTT offset for our buffer and put it there. */
2965 ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
2966 if (ret)
2967 return ret;
2968
2969 entry->offset = obj_priv->gtt_offset;
2970
673a394b
EA
2971 /* Apply the relocations, using the GTT aperture to avoid cache
2972 * flushing requirements.
2973 */
2974 for (i = 0; i < entry->relocation_count; i++) {
40a5f0de 2975 struct drm_i915_gem_relocation_entry *reloc= &relocs[i];
673a394b
EA
2976 struct drm_gem_object *target_obj;
2977 struct drm_i915_gem_object *target_obj_priv;
3043c60c
EA
2978 uint32_t reloc_val, reloc_offset;
2979 uint32_t __iomem *reloc_entry;
673a394b 2980
673a394b 2981 target_obj = drm_gem_object_lookup(obj->dev, file_priv,
40a5f0de 2982 reloc->target_handle);
673a394b
EA
2983 if (target_obj == NULL) {
2984 i915_gem_object_unpin(obj);
2985 return -EBADF;
2986 }
2987 target_obj_priv = target_obj->driver_private;
2988
2989 /* The target buffer should have appeared before us in the
2990 * exec_object list, so it should have a GTT space bound by now.
2991 */
2992 if (target_obj_priv->gtt_space == NULL) {
2993 DRM_ERROR("No GTT space found for object %d\n",
40a5f0de 2994 reloc->target_handle);
673a394b
EA
2995 drm_gem_object_unreference(target_obj);
2996 i915_gem_object_unpin(obj);
2997 return -EINVAL;
2998 }
2999
40a5f0de 3000 if (reloc->offset > obj->size - 4) {
673a394b
EA
3001 DRM_ERROR("Relocation beyond object bounds: "
3002 "obj %p target %d offset %d size %d.\n",
40a5f0de
EA
3003 obj, reloc->target_handle,
3004 (int) reloc->offset, (int) obj->size);
673a394b
EA
3005 drm_gem_object_unreference(target_obj);
3006 i915_gem_object_unpin(obj);
3007 return -EINVAL;
3008 }
40a5f0de 3009 if (reloc->offset & 3) {
673a394b
EA
3010 DRM_ERROR("Relocation not 4-byte aligned: "
3011 "obj %p target %d offset %d.\n",
40a5f0de
EA
3012 obj, reloc->target_handle,
3013 (int) reloc->offset);
673a394b
EA
3014 drm_gem_object_unreference(target_obj);
3015 i915_gem_object_unpin(obj);
3016 return -EINVAL;
3017 }
3018
40a5f0de
EA
3019 if (reloc->write_domain & I915_GEM_DOMAIN_CPU ||
3020 reloc->read_domains & I915_GEM_DOMAIN_CPU) {
e47c68e9
EA
3021 DRM_ERROR("reloc with read/write CPU domains: "
3022 "obj %p target %d offset %d "
3023 "read %08x write %08x",
40a5f0de
EA
3024 obj, reloc->target_handle,
3025 (int) reloc->offset,
3026 reloc->read_domains,
3027 reloc->write_domain);
491152b8
CW
3028 drm_gem_object_unreference(target_obj);
3029 i915_gem_object_unpin(obj);
e47c68e9
EA
3030 return -EINVAL;
3031 }
3032
40a5f0de
EA
3033 if (reloc->write_domain && target_obj->pending_write_domain &&
3034 reloc->write_domain != target_obj->pending_write_domain) {
673a394b
EA
3035 DRM_ERROR("Write domain conflict: "
3036 "obj %p target %d offset %d "
3037 "new %08x old %08x\n",
40a5f0de
EA
3038 obj, reloc->target_handle,
3039 (int) reloc->offset,
3040 reloc->write_domain,
673a394b
EA
3041 target_obj->pending_write_domain);
3042 drm_gem_object_unreference(target_obj);
3043 i915_gem_object_unpin(obj);
3044 return -EINVAL;
3045 }
3046
3047#if WATCH_RELOC
3048 DRM_INFO("%s: obj %p offset %08x target %d "
3049 "read %08x write %08x gtt %08x "
3050 "presumed %08x delta %08x\n",
3051 __func__,
3052 obj,
40a5f0de
EA
3053 (int) reloc->offset,
3054 (int) reloc->target_handle,
3055 (int) reloc->read_domains,
3056 (int) reloc->write_domain,
673a394b 3057 (int) target_obj_priv->gtt_offset,
40a5f0de
EA
3058 (int) reloc->presumed_offset,
3059 reloc->delta);
673a394b
EA
3060#endif
3061
40a5f0de
EA
3062 target_obj->pending_read_domains |= reloc->read_domains;
3063 target_obj->pending_write_domain |= reloc->write_domain;
673a394b
EA
3064
3065 /* If the relocation already has the right value in it, no
3066 * more work needs to be done.
3067 */
40a5f0de 3068 if (target_obj_priv->gtt_offset == reloc->presumed_offset) {
673a394b
EA
3069 drm_gem_object_unreference(target_obj);
3070 continue;
3071 }
3072
2ef7eeaa
EA
3073 ret = i915_gem_object_set_to_gtt_domain(obj, 1);
3074 if (ret != 0) {
3075 drm_gem_object_unreference(target_obj);
3076 i915_gem_object_unpin(obj);
3077 return -EINVAL;
673a394b
EA
3078 }
3079
3080 /* Map the page containing the relocation we're going to
3081 * perform.
3082 */
40a5f0de 3083 reloc_offset = obj_priv->gtt_offset + reloc->offset;
0839ccb8
KP
3084 reloc_page = io_mapping_map_atomic_wc(dev_priv->mm.gtt_mapping,
3085 (reloc_offset &
3086 ~(PAGE_SIZE - 1)));
3043c60c 3087 reloc_entry = (uint32_t __iomem *)(reloc_page +
0839ccb8 3088 (reloc_offset & (PAGE_SIZE - 1)));
40a5f0de 3089 reloc_val = target_obj_priv->gtt_offset + reloc->delta;
673a394b
EA
3090
3091#if WATCH_BUF
3092 DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n",
40a5f0de 3093 obj, (unsigned int) reloc->offset,
673a394b
EA
3094 readl(reloc_entry), reloc_val);
3095#endif
3096 writel(reloc_val, reloc_entry);
0839ccb8 3097 io_mapping_unmap_atomic(reloc_page);
673a394b 3098
40a5f0de
EA
3099 /* The updated presumed offset for this entry will be
3100 * copied back out to the user.
673a394b 3101 */
40a5f0de 3102 reloc->presumed_offset = target_obj_priv->gtt_offset;
673a394b
EA
3103
3104 drm_gem_object_unreference(target_obj);
3105 }
3106
673a394b
EA
3107#if WATCH_BUF
3108 if (0)
3109 i915_gem_dump_object(obj, 128, __func__, ~0);
3110#endif
3111 return 0;
3112}
3113
3114/** Dispatch a batchbuffer to the ring
3115 */
3116static int
3117i915_dispatch_gem_execbuffer(struct drm_device *dev,
3118 struct drm_i915_gem_execbuffer *exec,
201361a5 3119 struct drm_clip_rect *cliprects,
673a394b
EA
3120 uint64_t exec_offset)
3121{
3122 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
3123 int nbox = exec->num_cliprects;
3124 int i = 0, count;
83d60795 3125 uint32_t exec_start, exec_len;
673a394b
EA
3126 RING_LOCALS;
3127
3128 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3129 exec_len = (uint32_t) exec->batch_len;
3130
673a394b
EA
3131 count = nbox ? nbox : 1;
3132
3133 for (i = 0; i < count; i++) {
3134 if (i < nbox) {
201361a5 3135 int ret = i915_emit_box(dev, cliprects, i,
673a394b
EA
3136 exec->DR1, exec->DR4);
3137 if (ret)
3138 return ret;
3139 }
3140
3141 if (IS_I830(dev) || IS_845G(dev)) {
3142 BEGIN_LP_RING(4);
3143 OUT_RING(MI_BATCH_BUFFER);
3144 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3145 OUT_RING(exec_start + exec_len - 4);
3146 OUT_RING(0);
3147 ADVANCE_LP_RING();
3148 } else {
3149 BEGIN_LP_RING(2);
3150 if (IS_I965G(dev)) {
3151 OUT_RING(MI_BATCH_BUFFER_START |
3152 (2 << 6) |
3153 MI_BATCH_NON_SECURE_I965);
3154 OUT_RING(exec_start);
3155 } else {
3156 OUT_RING(MI_BATCH_BUFFER_START |
3157 (2 << 6));
3158 OUT_RING(exec_start | MI_BATCH_NON_SECURE);
3159 }
3160 ADVANCE_LP_RING();
3161 }
3162 }
3163
3164 /* XXX breadcrumb */
3165 return 0;
3166}
3167
3168/* Throttle our rendering by waiting until the ring has completed our requests
3169 * emitted over 20 msec ago.
3170 *
b962442e
EA
3171 * Note that if we were to use the current jiffies each time around the loop,
3172 * we wouldn't escape the function with any frames outstanding if the time to
3173 * render a frame was over 20ms.
3174 *
673a394b
EA
3175 * This should get us reasonable parallelism between CPU and GPU but also
3176 * relatively low latency when blocking on a particular request to finish.
3177 */
3178static int
3179i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
3180{
3181 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
3182 int ret = 0;
b962442e 3183 unsigned long recent_enough = jiffies - msecs_to_jiffies(20);
673a394b
EA
3184
3185 mutex_lock(&dev->struct_mutex);
b962442e
EA
3186 while (!list_empty(&i915_file_priv->mm.request_list)) {
3187 struct drm_i915_gem_request *request;
3188
3189 request = list_first_entry(&i915_file_priv->mm.request_list,
3190 struct drm_i915_gem_request,
3191 client_list);
3192
3193 if (time_after_eq(request->emitted_jiffies, recent_enough))
3194 break;
3195
3196 ret = i915_wait_request(dev, request->seqno);
3197 if (ret != 0)
3198 break;
3199 }
673a394b 3200 mutex_unlock(&dev->struct_mutex);
b962442e 3201
673a394b
EA
3202 return ret;
3203}
3204
40a5f0de
EA
3205static int
3206i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
3207 uint32_t buffer_count,
3208 struct drm_i915_gem_relocation_entry **relocs)
3209{
3210 uint32_t reloc_count = 0, reloc_index = 0, i;
3211 int ret;
3212
3213 *relocs = NULL;
3214 for (i = 0; i < buffer_count; i++) {
3215 if (reloc_count + exec_list[i].relocation_count < reloc_count)
3216 return -EINVAL;
3217 reloc_count += exec_list[i].relocation_count;
3218 }
3219
8e7d2b2c 3220 *relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
40a5f0de
EA
3221 if (*relocs == NULL)
3222 return -ENOMEM;
3223
3224 for (i = 0; i < buffer_count; i++) {
3225 struct drm_i915_gem_relocation_entry __user *user_relocs;
3226
3227 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3228
3229 ret = copy_from_user(&(*relocs)[reloc_index],
3230 user_relocs,
3231 exec_list[i].relocation_count *
3232 sizeof(**relocs));
3233 if (ret != 0) {
8e7d2b2c 3234 drm_free_large(*relocs);
40a5f0de 3235 *relocs = NULL;
2bc43b5c 3236 return -EFAULT;
40a5f0de
EA
3237 }
3238
3239 reloc_index += exec_list[i].relocation_count;
3240 }
3241
2bc43b5c 3242 return 0;
40a5f0de
EA
3243}
3244
3245static int
3246i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
3247 uint32_t buffer_count,
3248 struct drm_i915_gem_relocation_entry *relocs)
3249{
3250 uint32_t reloc_count = 0, i;
2bc43b5c 3251 int ret = 0;
40a5f0de
EA
3252
3253 for (i = 0; i < buffer_count; i++) {
3254 struct drm_i915_gem_relocation_entry __user *user_relocs;
2bc43b5c 3255 int unwritten;
40a5f0de
EA
3256
3257 user_relocs = (void __user *)(uintptr_t)exec_list[i].relocs_ptr;
3258
2bc43b5c
FM
3259 unwritten = copy_to_user(user_relocs,
3260 &relocs[reloc_count],
3261 exec_list[i].relocation_count *
3262 sizeof(*relocs));
3263
3264 if (unwritten) {
3265 ret = -EFAULT;
3266 goto err;
40a5f0de
EA
3267 }
3268
3269 reloc_count += exec_list[i].relocation_count;
3270 }
3271
2bc43b5c 3272err:
8e7d2b2c 3273 drm_free_large(relocs);
40a5f0de
EA
3274
3275 return ret;
3276}
3277
83d60795
CW
3278static int
3279i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
3280 uint64_t exec_offset)
3281{
3282 uint32_t exec_start, exec_len;
3283
3284 exec_start = (uint32_t) exec_offset + exec->batch_start_offset;
3285 exec_len = (uint32_t) exec->batch_len;
3286
3287 if ((exec_start | exec_len) & 0x7)
3288 return -EINVAL;
3289
3290 if (!exec_start)
3291 return -EINVAL;
3292
3293 return 0;
3294}
3295
673a394b
EA
3296int
3297i915_gem_execbuffer(struct drm_device *dev, void *data,
3298 struct drm_file *file_priv)
3299{
3300 drm_i915_private_t *dev_priv = dev->dev_private;
673a394b
EA
3301 struct drm_i915_gem_execbuffer *args = data;
3302 struct drm_i915_gem_exec_object *exec_list = NULL;
3303 struct drm_gem_object **object_list = NULL;
3304 struct drm_gem_object *batch_obj;
b70d11da 3305 struct drm_i915_gem_object *obj_priv;
201361a5 3306 struct drm_clip_rect *cliprects = NULL;
40a5f0de
EA
3307 struct drm_i915_gem_relocation_entry *relocs;
3308 int ret, ret2, i, pinned = 0;
673a394b 3309 uint64_t exec_offset;
40a5f0de 3310 uint32_t seqno, flush_domains, reloc_index;
ac94a962 3311 int pin_tries;
673a394b
EA
3312
3313#if WATCH_EXEC
3314 DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
3315 (int) args->buffers_ptr, args->buffer_count, args->batch_len);
3316#endif
3317
4f481ed2
EA
3318 if (args->buffer_count < 1) {
3319 DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
3320 return -EINVAL;
3321 }
673a394b 3322 /* Copy in the exec list from userland */
8e7d2b2c
JB
3323 exec_list = drm_calloc_large(sizeof(*exec_list), args->buffer_count);
3324 object_list = drm_calloc_large(sizeof(*object_list), args->buffer_count);
673a394b
EA
3325 if (exec_list == NULL || object_list == NULL) {
3326 DRM_ERROR("Failed to allocate exec or object list "
3327 "for %d buffers\n",
3328 args->buffer_count);
3329 ret = -ENOMEM;
3330 goto pre_mutex_err;
3331 }
3332 ret = copy_from_user(exec_list,
3333 (struct drm_i915_relocation_entry __user *)
3334 (uintptr_t) args->buffers_ptr,
3335 sizeof(*exec_list) * args->buffer_count);
3336 if (ret != 0) {
3337 DRM_ERROR("copy %d exec entries failed %d\n",
3338 args->buffer_count, ret);
3339 goto pre_mutex_err;
3340 }
3341
201361a5 3342 if (args->num_cliprects != 0) {
9a298b2a
EA
3343 cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
3344 GFP_KERNEL);
201361a5
EA
3345 if (cliprects == NULL)
3346 goto pre_mutex_err;
3347
3348 ret = copy_from_user(cliprects,
3349 (struct drm_clip_rect __user *)
3350 (uintptr_t) args->cliprects_ptr,
3351 sizeof(*cliprects) * args->num_cliprects);
3352 if (ret != 0) {
3353 DRM_ERROR("copy %d cliprects failed: %d\n",
3354 args->num_cliprects, ret);
3355 goto pre_mutex_err;
3356 }
3357 }
3358
40a5f0de
EA
3359 ret = i915_gem_get_relocs_from_user(exec_list, args->buffer_count,
3360 &relocs);
3361 if (ret != 0)
3362 goto pre_mutex_err;
3363
673a394b
EA
3364 mutex_lock(&dev->struct_mutex);
3365
3366 i915_verify_inactive(dev, __FILE__, __LINE__);
3367
ba1234d1 3368 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b
EA
3369 DRM_ERROR("Execbuf while wedged\n");
3370 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3371 ret = -EIO;
3372 goto pre_mutex_err;
673a394b
EA
3373 }
3374
3375 if (dev_priv->mm.suspended) {
3376 DRM_ERROR("Execbuf while VT-switched.\n");
3377 mutex_unlock(&dev->struct_mutex);
a198bc80
CW
3378 ret = -EBUSY;
3379 goto pre_mutex_err;
673a394b
EA
3380 }
3381
ac94a962 3382 /* Look up object handles */
673a394b
EA
3383 for (i = 0; i < args->buffer_count; i++) {
3384 object_list[i] = drm_gem_object_lookup(dev, file_priv,
3385 exec_list[i].handle);
3386 if (object_list[i] == NULL) {
3387 DRM_ERROR("Invalid object handle %d at index %d\n",
3388 exec_list[i].handle, i);
3389 ret = -EBADF;
3390 goto err;
3391 }
b70d11da
KH
3392
3393 obj_priv = object_list[i]->driver_private;
3394 if (obj_priv->in_execbuffer) {
3395 DRM_ERROR("Object %p appears more than once in object list\n",
3396 object_list[i]);
3397 ret = -EBADF;
3398 goto err;
3399 }
3400 obj_priv->in_execbuffer = true;
ac94a962 3401 }
673a394b 3402
ac94a962
KP
3403 /* Pin and relocate */
3404 for (pin_tries = 0; ; pin_tries++) {
3405 ret = 0;
40a5f0de
EA
3406 reloc_index = 0;
3407
ac94a962
KP
3408 for (i = 0; i < args->buffer_count; i++) {
3409 object_list[i]->pending_read_domains = 0;
3410 object_list[i]->pending_write_domain = 0;
3411 ret = i915_gem_object_pin_and_relocate(object_list[i],
3412 file_priv,
40a5f0de
EA
3413 &exec_list[i],
3414 &relocs[reloc_index]);
ac94a962
KP
3415 if (ret)
3416 break;
3417 pinned = i + 1;
40a5f0de 3418 reloc_index += exec_list[i].relocation_count;
ac94a962
KP
3419 }
3420 /* success */
3421 if (ret == 0)
3422 break;
3423
3424 /* error other than GTT full, or we've already tried again */
2939e1f5 3425 if (ret != -ENOSPC || pin_tries >= 1) {
f1acec93
EA
3426 if (ret != -ERESTARTSYS)
3427 DRM_ERROR("Failed to pin buffers %d\n", ret);
673a394b
EA
3428 goto err;
3429 }
ac94a962
KP
3430
3431 /* unpin all of our buffers */
3432 for (i = 0; i < pinned; i++)
3433 i915_gem_object_unpin(object_list[i]);
b1177636 3434 pinned = 0;
ac94a962
KP
3435
3436 /* evict everyone we can from the aperture */
3437 ret = i915_gem_evict_everything(dev);
3438 if (ret)
3439 goto err;
673a394b
EA
3440 }
3441
3442 /* Set the pending read domains for the batch buffer to COMMAND */
3443 batch_obj = object_list[args->buffer_count-1];
5f26a2c7
CW
3444 if (batch_obj->pending_write_domain) {
3445 DRM_ERROR("Attempting to use self-modifying batch buffer\n");
3446 ret = -EINVAL;
3447 goto err;
3448 }
3449 batch_obj->pending_read_domains |= I915_GEM_DOMAIN_COMMAND;
673a394b 3450
83d60795
CW
3451 /* Sanity check the batch buffer, prior to moving objects */
3452 exec_offset = exec_list[args->buffer_count - 1].offset;
3453 ret = i915_gem_check_execbuffer (args, exec_offset);
3454 if (ret != 0) {
3455 DRM_ERROR("execbuf with invalid offset/length\n");
3456 goto err;
3457 }
3458
673a394b
EA
3459 i915_verify_inactive(dev, __FILE__, __LINE__);
3460
646f0f6e
KP
3461 /* Zero the global flush/invalidate flags. These
3462 * will be modified as new domains are computed
3463 * for each object
3464 */
3465 dev->invalidate_domains = 0;
3466 dev->flush_domains = 0;
3467
673a394b
EA
3468 for (i = 0; i < args->buffer_count; i++) {
3469 struct drm_gem_object *obj = object_list[i];
673a394b 3470
646f0f6e 3471 /* Compute new gpu domains and update invalidate/flush */
8b0e378a 3472 i915_gem_object_set_to_gpu_domain(obj);
673a394b
EA
3473 }
3474
3475 i915_verify_inactive(dev, __FILE__, __LINE__);
3476
646f0f6e
KP
3477 if (dev->invalidate_domains | dev->flush_domains) {
3478#if WATCH_EXEC
3479 DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n",
3480 __func__,
3481 dev->invalidate_domains,
3482 dev->flush_domains);
3483#endif
3484 i915_gem_flush(dev,
3485 dev->invalidate_domains,
3486 dev->flush_domains);
3487 if (dev->flush_domains)
b962442e
EA
3488 (void)i915_add_request(dev, file_priv,
3489 dev->flush_domains);
646f0f6e 3490 }
673a394b 3491
efbeed96
EA
3492 for (i = 0; i < args->buffer_count; i++) {
3493 struct drm_gem_object *obj = object_list[i];
3494
3495 obj->write_domain = obj->pending_write_domain;
3496 }
3497
673a394b
EA
3498 i915_verify_inactive(dev, __FILE__, __LINE__);
3499
3500#if WATCH_COHERENCY
3501 for (i = 0; i < args->buffer_count; i++) {
3502 i915_gem_object_check_coherency(object_list[i],
3503 exec_list[i].handle);
3504 }
3505#endif
3506
673a394b 3507#if WATCH_EXEC
6911a9b8 3508 i915_gem_dump_object(batch_obj,
673a394b
EA
3509 args->batch_len,
3510 __func__,
3511 ~0);
3512#endif
3513
673a394b 3514 /* Exec the batchbuffer */
201361a5 3515 ret = i915_dispatch_gem_execbuffer(dev, args, cliprects, exec_offset);
673a394b
EA
3516 if (ret) {
3517 DRM_ERROR("dispatch failed %d\n", ret);
3518 goto err;
3519 }
3520
3521 /*
3522 * Ensure that the commands in the batch buffer are
3523 * finished before the interrupt fires
3524 */
3525 flush_domains = i915_retire_commands(dev);
3526
3527 i915_verify_inactive(dev, __FILE__, __LINE__);
3528
3529 /*
3530 * Get a seqno representing the execution of the current buffer,
3531 * which we can wait on. We would like to mitigate these interrupts,
3532 * likely by only creating seqnos occasionally (so that we have
3533 * *some* interrupts representing completion of buffers that we can
3534 * wait on when trying to clear up gtt space).
3535 */
b962442e 3536 seqno = i915_add_request(dev, file_priv, flush_domains);
673a394b 3537 BUG_ON(seqno == 0);
673a394b
EA
3538 for (i = 0; i < args->buffer_count; i++) {
3539 struct drm_gem_object *obj = object_list[i];
673a394b 3540
ce44b0ea 3541 i915_gem_object_move_to_active(obj, seqno);
673a394b
EA
3542#if WATCH_LRU
3543 DRM_INFO("%s: move to exec list %p\n", __func__, obj);
3544#endif
3545 }
3546#if WATCH_LRU
3547 i915_dump_lru(dev, __func__);
3548#endif
3549
3550 i915_verify_inactive(dev, __FILE__, __LINE__);
3551
673a394b 3552err:
aad87dff
JL
3553 for (i = 0; i < pinned; i++)
3554 i915_gem_object_unpin(object_list[i]);
3555
b70d11da
KH
3556 for (i = 0; i < args->buffer_count; i++) {
3557 if (object_list[i]) {
3558 obj_priv = object_list[i]->driver_private;
3559 obj_priv->in_execbuffer = false;
3560 }
aad87dff 3561 drm_gem_object_unreference(object_list[i]);
b70d11da 3562 }
673a394b 3563
673a394b
EA
3564 mutex_unlock(&dev->struct_mutex);
3565
a35f2e2b
RD
3566 if (!ret) {
3567 /* Copy the new buffer offsets back to the user's exec list. */
3568 ret = copy_to_user((struct drm_i915_relocation_entry __user *)
3569 (uintptr_t) args->buffers_ptr,
3570 exec_list,
3571 sizeof(*exec_list) * args->buffer_count);
2bc43b5c
FM
3572 if (ret) {
3573 ret = -EFAULT;
a35f2e2b
RD
3574 DRM_ERROR("failed to copy %d exec entries "
3575 "back to user (%d)\n",
3576 args->buffer_count, ret);
2bc43b5c 3577 }
a35f2e2b
RD
3578 }
3579
40a5f0de
EA
3580 /* Copy the updated relocations out regardless of current error
3581 * state. Failure to update the relocs would mean that the next
3582 * time userland calls execbuf, it would do so with presumed offset
3583 * state that didn't match the actual object state.
3584 */
3585 ret2 = i915_gem_put_relocs_to_user(exec_list, args->buffer_count,
3586 relocs);
3587 if (ret2 != 0) {
3588 DRM_ERROR("Failed to copy relocations back out: %d\n", ret2);
3589
3590 if (ret == 0)
3591 ret = ret2;
3592 }
3593
673a394b 3594pre_mutex_err:
8e7d2b2c
JB
3595 drm_free_large(object_list);
3596 drm_free_large(exec_list);
9a298b2a 3597 kfree(cliprects);
673a394b
EA
3598
3599 return ret;
3600}
3601
3602int
3603i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
3604{
3605 struct drm_device *dev = obj->dev;
3606 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3607 int ret;
3608
3609 i915_verify_inactive(dev, __FILE__, __LINE__);
3610 if (obj_priv->gtt_space == NULL) {
3611 ret = i915_gem_object_bind_to_gtt(obj, alignment);
3612 if (ret != 0) {
9bb2d6f9 3613 if (ret != -EBUSY && ret != -ERESTARTSYS)
0fce81e3 3614 DRM_ERROR("Failure to bind: %d\n", ret);
673a394b
EA
3615 return ret;
3616 }
22c344e9
CW
3617 }
3618 /*
3619 * Pre-965 chips need a fence register set up in order to
3620 * properly handle tiled surfaces.
3621 */
a09ba7fa 3622 if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
8c4b8c3f 3623 ret = i915_gem_object_get_fence_reg(obj);
22c344e9
CW
3624 if (ret != 0) {
3625 if (ret != -EBUSY && ret != -ERESTARTSYS)
3626 DRM_ERROR("Failure to install fence: %d\n",
3627 ret);
3628 return ret;
3629 }
673a394b
EA
3630 }
3631 obj_priv->pin_count++;
3632
3633 /* If the object is not active and not pending a flush,
3634 * remove it from the inactive list
3635 */
3636 if (obj_priv->pin_count == 1) {
3637 atomic_inc(&dev->pin_count);
3638 atomic_add(obj->size, &dev->pin_memory);
3639 if (!obj_priv->active &&
21d509e3 3640 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0 &&
673a394b
EA
3641 !list_empty(&obj_priv->list))
3642 list_del_init(&obj_priv->list);
3643 }
3644 i915_verify_inactive(dev, __FILE__, __LINE__);
3645
3646 return 0;
3647}
3648
3649void
3650i915_gem_object_unpin(struct drm_gem_object *obj)
3651{
3652 struct drm_device *dev = obj->dev;
3653 drm_i915_private_t *dev_priv = dev->dev_private;
3654 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3655
3656 i915_verify_inactive(dev, __FILE__, __LINE__);
3657 obj_priv->pin_count--;
3658 BUG_ON(obj_priv->pin_count < 0);
3659 BUG_ON(obj_priv->gtt_space == NULL);
3660
3661 /* If the object is no longer pinned, and is
3662 * neither active nor being flushed, then stick it on
3663 * the inactive list
3664 */
3665 if (obj_priv->pin_count == 0) {
3666 if (!obj_priv->active &&
21d509e3 3667 (obj->write_domain & I915_GEM_GPU_DOMAINS) == 0)
673a394b
EA
3668 list_move_tail(&obj_priv->list,
3669 &dev_priv->mm.inactive_list);
3670 atomic_dec(&dev->pin_count);
3671 atomic_sub(obj->size, &dev->pin_memory);
3672 }
3673 i915_verify_inactive(dev, __FILE__, __LINE__);
3674}
3675
3676int
3677i915_gem_pin_ioctl(struct drm_device *dev, void *data,
3678 struct drm_file *file_priv)
3679{
3680 struct drm_i915_gem_pin *args = data;
3681 struct drm_gem_object *obj;
3682 struct drm_i915_gem_object *obj_priv;
3683 int ret;
3684
3685 mutex_lock(&dev->struct_mutex);
3686
3687 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3688 if (obj == NULL) {
3689 DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n",
3690 args->handle);
3691 mutex_unlock(&dev->struct_mutex);
3692 return -EBADF;
3693 }
3694 obj_priv = obj->driver_private;
3695
3ef94daa
CW
3696 if (obj_priv->madv == I915_MADV_DONTNEED) {
3697 DRM_ERROR("Attempting to pin a I915_MADV_DONTNEED buffer\n");
3698 drm_gem_object_unreference(obj);
3699 mutex_unlock(&dev->struct_mutex);
3700 return -EINVAL;
3701 }
3702
79e53945
JB
3703 if (obj_priv->pin_filp != NULL && obj_priv->pin_filp != file_priv) {
3704 DRM_ERROR("Already pinned in i915_gem_pin_ioctl(): %d\n",
3705 args->handle);
96dec61d 3706 drm_gem_object_unreference(obj);
673a394b 3707 mutex_unlock(&dev->struct_mutex);
79e53945
JB
3708 return -EINVAL;
3709 }
3710
3711 obj_priv->user_pin_count++;
3712 obj_priv->pin_filp = file_priv;
3713 if (obj_priv->user_pin_count == 1) {
3714 ret = i915_gem_object_pin(obj, args->alignment);
3715 if (ret != 0) {
3716 drm_gem_object_unreference(obj);
3717 mutex_unlock(&dev->struct_mutex);
3718 return ret;
3719 }
673a394b
EA
3720 }
3721
3722 /* XXX - flush the CPU caches for pinned objects
3723 * as the X server doesn't manage domains yet
3724 */
e47c68e9 3725 i915_gem_object_flush_cpu_write_domain(obj);
673a394b
EA
3726 args->offset = obj_priv->gtt_offset;
3727 drm_gem_object_unreference(obj);
3728 mutex_unlock(&dev->struct_mutex);
3729
3730 return 0;
3731}
3732
3733int
3734i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
3735 struct drm_file *file_priv)
3736{
3737 struct drm_i915_gem_pin *args = data;
3738 struct drm_gem_object *obj;
79e53945 3739 struct drm_i915_gem_object *obj_priv;
673a394b
EA
3740
3741 mutex_lock(&dev->struct_mutex);
3742
3743 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3744 if (obj == NULL) {
3745 DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n",
3746 args->handle);
3747 mutex_unlock(&dev->struct_mutex);
3748 return -EBADF;
3749 }
3750
79e53945
JB
3751 obj_priv = obj->driver_private;
3752 if (obj_priv->pin_filp != file_priv) {
3753 DRM_ERROR("Not pinned by caller in i915_gem_pin_ioctl(): %d\n",
3754 args->handle);
3755 drm_gem_object_unreference(obj);
3756 mutex_unlock(&dev->struct_mutex);
3757 return -EINVAL;
3758 }
3759 obj_priv->user_pin_count--;
3760 if (obj_priv->user_pin_count == 0) {
3761 obj_priv->pin_filp = NULL;
3762 i915_gem_object_unpin(obj);
3763 }
673a394b
EA
3764
3765 drm_gem_object_unreference(obj);
3766 mutex_unlock(&dev->struct_mutex);
3767 return 0;
3768}
3769
3770int
3771i915_gem_busy_ioctl(struct drm_device *dev, void *data,
3772 struct drm_file *file_priv)
3773{
3774 struct drm_i915_gem_busy *args = data;
3775 struct drm_gem_object *obj;
3776 struct drm_i915_gem_object *obj_priv;
3777
673a394b
EA
3778 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3779 if (obj == NULL) {
3780 DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n",
3781 args->handle);
673a394b
EA
3782 return -EBADF;
3783 }
3784
b1ce786c 3785 mutex_lock(&dev->struct_mutex);
f21289b3
EA
3786 /* Update the active list for the hardware's current position.
3787 * Otherwise this only updates on a delayed timer or when irqs are
3788 * actually unmasked, and our working set ends up being larger than
3789 * required.
3790 */
3791 i915_gem_retire_requests(dev);
3792
673a394b 3793 obj_priv = obj->driver_private;
c4de0a5d
EA
3794 /* Don't count being on the flushing list against the object being
3795 * done. Otherwise, a buffer left on the flushing list but not getting
3796 * flushed (because nobody's flushing that domain) won't ever return
3797 * unbusy and get reused by libdrm's bo cache. The other expected
3798 * consumer of this interface, OpenGL's occlusion queries, also specs
3799 * that the objects get unbusy "eventually" without any interference.
3800 */
3801 args->busy = obj_priv->active && obj_priv->last_rendering_seqno != 0;
673a394b
EA
3802
3803 drm_gem_object_unreference(obj);
3804 mutex_unlock(&dev->struct_mutex);
3805 return 0;
3806}
3807
3808int
3809i915_gem_throttle_ioctl(struct drm_device *dev, void *data,
3810 struct drm_file *file_priv)
3811{
3812 return i915_gem_ring_throttle(dev, file_priv);
3813}
3814
3ef94daa
CW
3815int
3816i915_gem_madvise_ioctl(struct drm_device *dev, void *data,
3817 struct drm_file *file_priv)
3818{
3819 struct drm_i915_gem_madvise *args = data;
3820 struct drm_gem_object *obj;
3821 struct drm_i915_gem_object *obj_priv;
3822
3823 switch (args->madv) {
3824 case I915_MADV_DONTNEED:
3825 case I915_MADV_WILLNEED:
3826 break;
3827 default:
3828 return -EINVAL;
3829 }
3830
3831 obj = drm_gem_object_lookup(dev, file_priv, args->handle);
3832 if (obj == NULL) {
3833 DRM_ERROR("Bad handle in i915_gem_madvise_ioctl(): %d\n",
3834 args->handle);
3835 return -EBADF;
3836 }
3837
3838 mutex_lock(&dev->struct_mutex);
3839 obj_priv = obj->driver_private;
3840
3841 if (obj_priv->pin_count) {
3842 drm_gem_object_unreference(obj);
3843 mutex_unlock(&dev->struct_mutex);
3844
3845 DRM_ERROR("Attempted i915_gem_madvise_ioctl() on a pinned object\n");
3846 return -EINVAL;
3847 }
3848
3849 obj_priv->madv = args->madv;
3850 args->retained = obj_priv->gtt_space != NULL;
3851
3852 drm_gem_object_unreference(obj);
3853 mutex_unlock(&dev->struct_mutex);
3854
3855 return 0;
3856}
3857
673a394b
EA
3858int i915_gem_init_object(struct drm_gem_object *obj)
3859{
3860 struct drm_i915_gem_object *obj_priv;
3861
9a298b2a 3862 obj_priv = kzalloc(sizeof(*obj_priv), GFP_KERNEL);
673a394b
EA
3863 if (obj_priv == NULL)
3864 return -ENOMEM;
3865
3866 /*
3867 * We've just allocated pages from the kernel,
3868 * so they've just been written by the CPU with
3869 * zeros. They'll need to be clflushed before we
3870 * use them with the GPU.
3871 */
3872 obj->write_domain = I915_GEM_DOMAIN_CPU;
3873 obj->read_domains = I915_GEM_DOMAIN_CPU;
3874
ba1eb1d8
KP
3875 obj_priv->agp_type = AGP_USER_MEMORY;
3876
673a394b
EA
3877 obj->driver_private = obj_priv;
3878 obj_priv->obj = obj;
de151cf6 3879 obj_priv->fence_reg = I915_FENCE_REG_NONE;
673a394b 3880 INIT_LIST_HEAD(&obj_priv->list);
a09ba7fa 3881 INIT_LIST_HEAD(&obj_priv->fence_list);
3ef94daa 3882 obj_priv->madv = I915_MADV_WILLNEED;
de151cf6 3883
673a394b
EA
3884 return 0;
3885}
3886
3887void i915_gem_free_object(struct drm_gem_object *obj)
3888{
de151cf6 3889 struct drm_device *dev = obj->dev;
673a394b
EA
3890 struct drm_i915_gem_object *obj_priv = obj->driver_private;
3891
3892 while (obj_priv->pin_count > 0)
3893 i915_gem_object_unpin(obj);
3894
71acb5eb
DA
3895 if (obj_priv->phys_obj)
3896 i915_gem_detach_phys_object(dev, obj);
3897
673a394b
EA
3898 i915_gem_object_unbind(obj);
3899
7e616158
CW
3900 if (obj_priv->mmap_offset)
3901 i915_gem_free_mmap_offset(obj);
de151cf6 3902
9a298b2a 3903 kfree(obj_priv->page_cpu_valid);
280b713b 3904 kfree(obj_priv->bit_17);
9a298b2a 3905 kfree(obj->driver_private);
673a394b
EA
3906}
3907
673a394b
EA
3908/** Unbinds all objects that are on the given buffer list. */
3909static int
3910i915_gem_evict_from_list(struct drm_device *dev, struct list_head *head)
3911{
3912 struct drm_gem_object *obj;
3913 struct drm_i915_gem_object *obj_priv;
3914 int ret;
3915
3916 while (!list_empty(head)) {
3917 obj_priv = list_first_entry(head,
3918 struct drm_i915_gem_object,
3919 list);
3920 obj = obj_priv->obj;
3921
3922 if (obj_priv->pin_count != 0) {
3923 DRM_ERROR("Pinned object in unbind list\n");
3924 mutex_unlock(&dev->struct_mutex);
3925 return -EINVAL;
3926 }
3927
3928 ret = i915_gem_object_unbind(obj);
3929 if (ret != 0) {
3930 DRM_ERROR("Error unbinding object in LeaveVT: %d\n",
3931 ret);
3932 mutex_unlock(&dev->struct_mutex);
3933 return ret;
3934 }
3935 }
3936
3937
3938 return 0;
3939}
3940
5669fcac 3941int
673a394b
EA
3942i915_gem_idle(struct drm_device *dev)
3943{
3944 drm_i915_private_t *dev_priv = dev->dev_private;
3945 uint32_t seqno, cur_seqno, last_seqno;
3946 int stuck, ret;
3947
6dbe2772
KP
3948 mutex_lock(&dev->struct_mutex);
3949
3950 if (dev_priv->mm.suspended || dev_priv->ring.ring_obj == NULL) {
3951 mutex_unlock(&dev->struct_mutex);
673a394b 3952 return 0;
6dbe2772 3953 }
673a394b
EA
3954
3955 /* Hack! Don't let anybody do execbuf while we don't control the chip.
3956 * We need to replace this with a semaphore, or something.
3957 */
3958 dev_priv->mm.suspended = 1;
f65d9421 3959 del_timer(&dev_priv->hangcheck_timer);
673a394b 3960
6dbe2772
KP
3961 /* Cancel the retire work handler, wait for it to finish if running
3962 */
3963 mutex_unlock(&dev->struct_mutex);
3964 cancel_delayed_work_sync(&dev_priv->mm.retire_work);
3965 mutex_lock(&dev->struct_mutex);
3966
673a394b
EA
3967 i915_kernel_lost_context(dev);
3968
3969 /* Flush the GPU along with all non-CPU write domains
3970 */
21d509e3
CW
3971 i915_gem_flush(dev, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
3972 seqno = i915_add_request(dev, NULL, I915_GEM_GPU_DOMAINS);
673a394b
EA
3973
3974 if (seqno == 0) {
3975 mutex_unlock(&dev->struct_mutex);
3976 return -ENOMEM;
3977 }
3978
3979 dev_priv->mm.waiting_gem_seqno = seqno;
3980 last_seqno = 0;
3981 stuck = 0;
3982 for (;;) {
3983 cur_seqno = i915_get_gem_seqno(dev);
3984 if (i915_seqno_passed(cur_seqno, seqno))
3985 break;
3986 if (last_seqno == cur_seqno) {
3987 if (stuck++ > 100) {
3988 DRM_ERROR("hardware wedged\n");
ba1234d1 3989 atomic_set(&dev_priv->mm.wedged, 1);
673a394b
EA
3990 DRM_WAKEUP(&dev_priv->irq_queue);
3991 break;
3992 }
3993 }
3994 msleep(10);
3995 last_seqno = cur_seqno;
3996 }
3997 dev_priv->mm.waiting_gem_seqno = 0;
3998
3999 i915_gem_retire_requests(dev);
4000
5e118f41 4001 spin_lock(&dev_priv->mm.active_list_lock);
ba1234d1 4002 if (!atomic_read(&dev_priv->mm.wedged)) {
28dfe52a
EA
4003 /* Active and flushing should now be empty as we've
4004 * waited for a sequence higher than any pending execbuffer
4005 */
4006 WARN_ON(!list_empty(&dev_priv->mm.active_list));
4007 WARN_ON(!list_empty(&dev_priv->mm.flushing_list));
4008 /* Request should now be empty as we've also waited
4009 * for the last request in the list
4010 */
4011 WARN_ON(!list_empty(&dev_priv->mm.request_list));
4012 }
673a394b 4013
28dfe52a
EA
4014 /* Empty the active and flushing lists to inactive. If there's
4015 * anything left at this point, it means that we're wedged and
4016 * nothing good's going to happen by leaving them there. So strip
4017 * the GPU domains and just stuff them onto inactive.
673a394b 4018 */
28dfe52a
EA
4019 while (!list_empty(&dev_priv->mm.active_list)) {
4020 struct drm_i915_gem_object *obj_priv;
673a394b 4021
28dfe52a
EA
4022 obj_priv = list_first_entry(&dev_priv->mm.active_list,
4023 struct drm_i915_gem_object,
4024 list);
4025 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4026 i915_gem_object_move_to_inactive(obj_priv->obj);
4027 }
5e118f41 4028 spin_unlock(&dev_priv->mm.active_list_lock);
28dfe52a
EA
4029
4030 while (!list_empty(&dev_priv->mm.flushing_list)) {
4031 struct drm_i915_gem_object *obj_priv;
4032
151903d5 4033 obj_priv = list_first_entry(&dev_priv->mm.flushing_list,
28dfe52a
EA
4034 struct drm_i915_gem_object,
4035 list);
4036 obj_priv->obj->write_domain &= ~I915_GEM_GPU_DOMAINS;
4037 i915_gem_object_move_to_inactive(obj_priv->obj);
4038 }
4039
4040
4041 /* Move all inactive buffers out of the GTT. */
673a394b 4042 ret = i915_gem_evict_from_list(dev, &dev_priv->mm.inactive_list);
28dfe52a 4043 WARN_ON(!list_empty(&dev_priv->mm.inactive_list));
6dbe2772
KP
4044 if (ret) {
4045 mutex_unlock(&dev->struct_mutex);
673a394b 4046 return ret;
6dbe2772 4047 }
673a394b 4048
6dbe2772
KP
4049 i915_gem_cleanup_ringbuffer(dev);
4050 mutex_unlock(&dev->struct_mutex);
4051
673a394b
EA
4052 return 0;
4053}
4054
4055static int
4056i915_gem_init_hws(struct drm_device *dev)
4057{
4058 drm_i915_private_t *dev_priv = dev->dev_private;
4059 struct drm_gem_object *obj;
4060 struct drm_i915_gem_object *obj_priv;
4061 int ret;
4062
4063 /* If we need a physical address for the status page, it's already
4064 * initialized at driver load time.
4065 */
4066 if (!I915_NEED_GFX_HWS(dev))
4067 return 0;
4068
4069 obj = drm_gem_object_alloc(dev, 4096);
4070 if (obj == NULL) {
4071 DRM_ERROR("Failed to allocate status page\n");
4072 return -ENOMEM;
4073 }
4074 obj_priv = obj->driver_private;
ba1eb1d8 4075 obj_priv->agp_type = AGP_USER_CACHED_MEMORY;
673a394b
EA
4076
4077 ret = i915_gem_object_pin(obj, 4096);
4078 if (ret != 0) {
4079 drm_gem_object_unreference(obj);
4080 return ret;
4081 }
4082
4083 dev_priv->status_gfx_addr = obj_priv->gtt_offset;
673a394b 4084
856fa198 4085 dev_priv->hw_status_page = kmap(obj_priv->pages[0]);
ba1eb1d8 4086 if (dev_priv->hw_status_page == NULL) {
673a394b
EA
4087 DRM_ERROR("Failed to map status page.\n");
4088 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
3eb2ee77 4089 i915_gem_object_unpin(obj);
673a394b
EA
4090 drm_gem_object_unreference(obj);
4091 return -EINVAL;
4092 }
4093 dev_priv->hws_obj = obj;
673a394b
EA
4094 memset(dev_priv->hw_status_page, 0, PAGE_SIZE);
4095 I915_WRITE(HWS_PGA, dev_priv->status_gfx_addr);
ba1eb1d8 4096 I915_READ(HWS_PGA); /* posting read */
673a394b
EA
4097 DRM_DEBUG("hws offset: 0x%08x\n", dev_priv->status_gfx_addr);
4098
4099 return 0;
4100}
4101
85a7bb98
CW
4102static void
4103i915_gem_cleanup_hws(struct drm_device *dev)
4104{
4105 drm_i915_private_t *dev_priv = dev->dev_private;
bab2d1f6
CW
4106 struct drm_gem_object *obj;
4107 struct drm_i915_gem_object *obj_priv;
85a7bb98
CW
4108
4109 if (dev_priv->hws_obj == NULL)
4110 return;
4111
bab2d1f6
CW
4112 obj = dev_priv->hws_obj;
4113 obj_priv = obj->driver_private;
4114
856fa198 4115 kunmap(obj_priv->pages[0]);
85a7bb98
CW
4116 i915_gem_object_unpin(obj);
4117 drm_gem_object_unreference(obj);
4118 dev_priv->hws_obj = NULL;
bab2d1f6 4119
85a7bb98
CW
4120 memset(&dev_priv->hws_map, 0, sizeof(dev_priv->hws_map));
4121 dev_priv->hw_status_page = NULL;
4122
4123 /* Write high address into HWS_PGA when disabling. */
4124 I915_WRITE(HWS_PGA, 0x1ffff000);
4125}
4126
79e53945 4127int
673a394b
EA
4128i915_gem_init_ringbuffer(struct drm_device *dev)
4129{
4130 drm_i915_private_t *dev_priv = dev->dev_private;
4131 struct drm_gem_object *obj;
4132 struct drm_i915_gem_object *obj_priv;
79e53945 4133 drm_i915_ring_buffer_t *ring = &dev_priv->ring;
673a394b 4134 int ret;
50aa253d 4135 u32 head;
673a394b
EA
4136
4137 ret = i915_gem_init_hws(dev);
4138 if (ret != 0)
4139 return ret;
4140
4141 obj = drm_gem_object_alloc(dev, 128 * 1024);
4142 if (obj == NULL) {
4143 DRM_ERROR("Failed to allocate ringbuffer\n");
85a7bb98 4144 i915_gem_cleanup_hws(dev);
673a394b
EA
4145 return -ENOMEM;
4146 }
4147 obj_priv = obj->driver_private;
4148
4149 ret = i915_gem_object_pin(obj, 4096);
4150 if (ret != 0) {
4151 drm_gem_object_unreference(obj);
85a7bb98 4152 i915_gem_cleanup_hws(dev);
673a394b
EA
4153 return ret;
4154 }
4155
4156 /* Set up the kernel mapping for the ring. */
79e53945 4157 ring->Size = obj->size;
673a394b 4158
79e53945
JB
4159 ring->map.offset = dev->agp->base + obj_priv->gtt_offset;
4160 ring->map.size = obj->size;
4161 ring->map.type = 0;
4162 ring->map.flags = 0;
4163 ring->map.mtrr = 0;
673a394b 4164
79e53945
JB
4165 drm_core_ioremap_wc(&ring->map, dev);
4166 if (ring->map.handle == NULL) {
673a394b
EA
4167 DRM_ERROR("Failed to map ringbuffer.\n");
4168 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
47ed185a 4169 i915_gem_object_unpin(obj);
673a394b 4170 drm_gem_object_unreference(obj);
85a7bb98 4171 i915_gem_cleanup_hws(dev);
673a394b
EA
4172 return -EINVAL;
4173 }
79e53945
JB
4174 ring->ring_obj = obj;
4175 ring->virtual_start = ring->map.handle;
673a394b
EA
4176
4177 /* Stop the ring if it's running. */
4178 I915_WRITE(PRB0_CTL, 0);
673a394b 4179 I915_WRITE(PRB0_TAIL, 0);
50aa253d 4180 I915_WRITE(PRB0_HEAD, 0);
673a394b
EA
4181
4182 /* Initialize the ring. */
4183 I915_WRITE(PRB0_START, obj_priv->gtt_offset);
50aa253d
KP
4184 head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4185
4186 /* G45 ring initialization fails to reset head to zero */
4187 if (head != 0) {
4188 DRM_ERROR("Ring head not reset to zero "
4189 "ctl %08x head %08x tail %08x start %08x\n",
4190 I915_READ(PRB0_CTL),
4191 I915_READ(PRB0_HEAD),
4192 I915_READ(PRB0_TAIL),
4193 I915_READ(PRB0_START));
4194 I915_WRITE(PRB0_HEAD, 0);
4195
4196 DRM_ERROR("Ring head forced to zero "
4197 "ctl %08x head %08x tail %08x start %08x\n",
4198 I915_READ(PRB0_CTL),
4199 I915_READ(PRB0_HEAD),
4200 I915_READ(PRB0_TAIL),
4201 I915_READ(PRB0_START));
4202 }
4203
673a394b
EA
4204 I915_WRITE(PRB0_CTL,
4205 ((obj->size - 4096) & RING_NR_PAGES) |
4206 RING_NO_REPORT |
4207 RING_VALID);
4208
50aa253d
KP
4209 head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4210
4211 /* If the head is still not zero, the ring is dead */
4212 if (head != 0) {
4213 DRM_ERROR("Ring initialization failed "
4214 "ctl %08x head %08x tail %08x start %08x\n",
4215 I915_READ(PRB0_CTL),
4216 I915_READ(PRB0_HEAD),
4217 I915_READ(PRB0_TAIL),
4218 I915_READ(PRB0_START));
4219 return -EIO;
4220 }
4221
673a394b 4222 /* Update our cache of the ring state */
79e53945
JB
4223 if (!drm_core_check_feature(dev, DRIVER_MODESET))
4224 i915_kernel_lost_context(dev);
4225 else {
4226 ring->head = I915_READ(PRB0_HEAD) & HEAD_ADDR;
4227 ring->tail = I915_READ(PRB0_TAIL) & TAIL_ADDR;
4228 ring->space = ring->head - (ring->tail + 8);
4229 if (ring->space < 0)
4230 ring->space += ring->Size;
4231 }
673a394b
EA
4232
4233 return 0;
4234}
4235
79e53945 4236void
673a394b
EA
4237i915_gem_cleanup_ringbuffer(struct drm_device *dev)
4238{
4239 drm_i915_private_t *dev_priv = dev->dev_private;
4240
4241 if (dev_priv->ring.ring_obj == NULL)
4242 return;
4243
4244 drm_core_ioremapfree(&dev_priv->ring.map, dev);
4245
4246 i915_gem_object_unpin(dev_priv->ring.ring_obj);
4247 drm_gem_object_unreference(dev_priv->ring.ring_obj);
4248 dev_priv->ring.ring_obj = NULL;
4249 memset(&dev_priv->ring, 0, sizeof(dev_priv->ring));
4250
85a7bb98 4251 i915_gem_cleanup_hws(dev);
673a394b
EA
4252}
4253
4254int
4255i915_gem_entervt_ioctl(struct drm_device *dev, void *data,
4256 struct drm_file *file_priv)
4257{
4258 drm_i915_private_t *dev_priv = dev->dev_private;
4259 int ret;
4260
79e53945
JB
4261 if (drm_core_check_feature(dev, DRIVER_MODESET))
4262 return 0;
4263
ba1234d1 4264 if (atomic_read(&dev_priv->mm.wedged)) {
673a394b 4265 DRM_ERROR("Reenabling wedged hardware, good luck\n");
ba1234d1 4266 atomic_set(&dev_priv->mm.wedged, 0);
673a394b
EA
4267 }
4268
673a394b 4269 mutex_lock(&dev->struct_mutex);
9bb2d6f9
EA
4270 dev_priv->mm.suspended = 0;
4271
4272 ret = i915_gem_init_ringbuffer(dev);
d816f6ac
WF
4273 if (ret != 0) {
4274 mutex_unlock(&dev->struct_mutex);
9bb2d6f9 4275 return ret;
d816f6ac 4276 }
9bb2d6f9 4277
5e118f41 4278 spin_lock(&dev_priv->mm.active_list_lock);
673a394b 4279 BUG_ON(!list_empty(&dev_priv->mm.active_list));
5e118f41
CW
4280 spin_unlock(&dev_priv->mm.active_list_lock);
4281
673a394b
EA
4282 BUG_ON(!list_empty(&dev_priv->mm.flushing_list));
4283 BUG_ON(!list_empty(&dev_priv->mm.inactive_list));
4284 BUG_ON(!list_empty(&dev_priv->mm.request_list));
673a394b 4285 mutex_unlock(&dev->struct_mutex);
dbb19d30
KH
4286
4287 drm_irq_install(dev);
4288
673a394b
EA
4289 return 0;
4290}
4291
4292int
4293i915_gem_leavevt_ioctl(struct drm_device *dev, void *data,
4294 struct drm_file *file_priv)
4295{
4296 int ret;
4297
79e53945
JB
4298 if (drm_core_check_feature(dev, DRIVER_MODESET))
4299 return 0;
4300
673a394b 4301 ret = i915_gem_idle(dev);
dbb19d30
KH
4302 drm_irq_uninstall(dev);
4303
6dbe2772 4304 return ret;
673a394b
EA
4305}
4306
4307void
4308i915_gem_lastclose(struct drm_device *dev)
4309{
4310 int ret;
673a394b 4311
e806b495
EA
4312 if (drm_core_check_feature(dev, DRIVER_MODESET))
4313 return;
4314
6dbe2772
KP
4315 ret = i915_gem_idle(dev);
4316 if (ret)
4317 DRM_ERROR("failed to idle hardware: %d\n", ret);
673a394b
EA
4318}
4319
4320void
4321i915_gem_load(struct drm_device *dev)
4322{
b5aa8a0f 4323 int i;
673a394b
EA
4324 drm_i915_private_t *dev_priv = dev->dev_private;
4325
5e118f41 4326 spin_lock_init(&dev_priv->mm.active_list_lock);
673a394b
EA
4327 INIT_LIST_HEAD(&dev_priv->mm.active_list);
4328 INIT_LIST_HEAD(&dev_priv->mm.flushing_list);
4329 INIT_LIST_HEAD(&dev_priv->mm.inactive_list);
4330 INIT_LIST_HEAD(&dev_priv->mm.request_list);
a09ba7fa 4331 INIT_LIST_HEAD(&dev_priv->mm.fence_list);
673a394b
EA
4332 INIT_DELAYED_WORK(&dev_priv->mm.retire_work,
4333 i915_gem_retire_work_handler);
4334 dev_priv->mm.next_gem_seqno = 1;
4335
31169714
CW
4336 spin_lock(&shrink_list_lock);
4337 list_add(&dev_priv->mm.shrink_list, &shrink_list);
4338 spin_unlock(&shrink_list_lock);
4339
de151cf6
JB
4340 /* Old X drivers will take 0-2 for front, back, depth buffers */
4341 dev_priv->fence_reg_start = 3;
4342
0f973f27 4343 if (IS_I965G(dev) || IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
de151cf6
JB
4344 dev_priv->num_fence_regs = 16;
4345 else
4346 dev_priv->num_fence_regs = 8;
4347
b5aa8a0f
GH
4348 /* Initialize fence registers to zero */
4349 if (IS_I965G(dev)) {
4350 for (i = 0; i < 16; i++)
4351 I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
4352 } else {
4353 for (i = 0; i < 8; i++)
4354 I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
4355 if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
4356 for (i = 0; i < 8; i++)
4357 I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
4358 }
4359
673a394b
EA
4360 i915_gem_detect_bit_6_swizzle(dev);
4361}
71acb5eb
DA
4362
4363/*
4364 * Create a physically contiguous memory object for this object
4365 * e.g. for cursor + overlay regs
4366 */
4367int i915_gem_init_phys_object(struct drm_device *dev,
4368 int id, int size)
4369{
4370 drm_i915_private_t *dev_priv = dev->dev_private;
4371 struct drm_i915_gem_phys_object *phys_obj;
4372 int ret;
4373
4374 if (dev_priv->mm.phys_objs[id - 1] || !size)
4375 return 0;
4376
9a298b2a 4377 phys_obj = kzalloc(sizeof(struct drm_i915_gem_phys_object), GFP_KERNEL);
71acb5eb
DA
4378 if (!phys_obj)
4379 return -ENOMEM;
4380
4381 phys_obj->id = id;
4382
4383 phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
4384 if (!phys_obj->handle) {
4385 ret = -ENOMEM;
4386 goto kfree_obj;
4387 }
4388#ifdef CONFIG_X86
4389 set_memory_wc((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4390#endif
4391
4392 dev_priv->mm.phys_objs[id - 1] = phys_obj;
4393
4394 return 0;
4395kfree_obj:
9a298b2a 4396 kfree(phys_obj);
71acb5eb
DA
4397 return ret;
4398}
4399
4400void i915_gem_free_phys_object(struct drm_device *dev, int id)
4401{
4402 drm_i915_private_t *dev_priv = dev->dev_private;
4403 struct drm_i915_gem_phys_object *phys_obj;
4404
4405 if (!dev_priv->mm.phys_objs[id - 1])
4406 return;
4407
4408 phys_obj = dev_priv->mm.phys_objs[id - 1];
4409 if (phys_obj->cur_obj) {
4410 i915_gem_detach_phys_object(dev, phys_obj->cur_obj);
4411 }
4412
4413#ifdef CONFIG_X86
4414 set_memory_wb((unsigned long)phys_obj->handle->vaddr, phys_obj->handle->size / PAGE_SIZE);
4415#endif
4416 drm_pci_free(dev, phys_obj->handle);
4417 kfree(phys_obj);
4418 dev_priv->mm.phys_objs[id - 1] = NULL;
4419}
4420
4421void i915_gem_free_all_phys_object(struct drm_device *dev)
4422{
4423 int i;
4424
260883c8 4425 for (i = I915_GEM_PHYS_CURSOR_0; i <= I915_MAX_PHYS_OBJECT; i++)
71acb5eb
DA
4426 i915_gem_free_phys_object(dev, i);
4427}
4428
4429void i915_gem_detach_phys_object(struct drm_device *dev,
4430 struct drm_gem_object *obj)
4431{
4432 struct drm_i915_gem_object *obj_priv;
4433 int i;
4434 int ret;
4435 int page_count;
4436
4437 obj_priv = obj->driver_private;
4438 if (!obj_priv->phys_obj)
4439 return;
4440
856fa198 4441 ret = i915_gem_object_get_pages(obj);
71acb5eb
DA
4442 if (ret)
4443 goto out;
4444
4445 page_count = obj->size / PAGE_SIZE;
4446
4447 for (i = 0; i < page_count; i++) {
856fa198 4448 char *dst = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4449 char *src = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4450
4451 memcpy(dst, src, PAGE_SIZE);
4452 kunmap_atomic(dst, KM_USER0);
4453 }
856fa198 4454 drm_clflush_pages(obj_priv->pages, page_count);
71acb5eb 4455 drm_agp_chipset_flush(dev);
d78b47b9
CW
4456
4457 i915_gem_object_put_pages(obj);
71acb5eb
DA
4458out:
4459 obj_priv->phys_obj->cur_obj = NULL;
4460 obj_priv->phys_obj = NULL;
4461}
4462
4463int
4464i915_gem_attach_phys_object(struct drm_device *dev,
4465 struct drm_gem_object *obj, int id)
4466{
4467 drm_i915_private_t *dev_priv = dev->dev_private;
4468 struct drm_i915_gem_object *obj_priv;
4469 int ret = 0;
4470 int page_count;
4471 int i;
4472
4473 if (id > I915_MAX_PHYS_OBJECT)
4474 return -EINVAL;
4475
4476 obj_priv = obj->driver_private;
4477
4478 if (obj_priv->phys_obj) {
4479 if (obj_priv->phys_obj->id == id)
4480 return 0;
4481 i915_gem_detach_phys_object(dev, obj);
4482 }
4483
4484
4485 /* create a new object */
4486 if (!dev_priv->mm.phys_objs[id - 1]) {
4487 ret = i915_gem_init_phys_object(dev, id,
4488 obj->size);
4489 if (ret) {
aeb565df 4490 DRM_ERROR("failed to init phys object %d size: %zu\n", id, obj->size);
71acb5eb
DA
4491 goto out;
4492 }
4493 }
4494
4495 /* bind to the object */
4496 obj_priv->phys_obj = dev_priv->mm.phys_objs[id - 1];
4497 obj_priv->phys_obj->cur_obj = obj;
4498
856fa198 4499 ret = i915_gem_object_get_pages(obj);
71acb5eb
DA
4500 if (ret) {
4501 DRM_ERROR("failed to get page list\n");
4502 goto out;
4503 }
4504
4505 page_count = obj->size / PAGE_SIZE;
4506
4507 for (i = 0; i < page_count; i++) {
856fa198 4508 char *src = kmap_atomic(obj_priv->pages[i], KM_USER0);
71acb5eb
DA
4509 char *dst = obj_priv->phys_obj->handle->vaddr + (i * PAGE_SIZE);
4510
4511 memcpy(dst, src, PAGE_SIZE);
4512 kunmap_atomic(src, KM_USER0);
4513 }
4514
d78b47b9
CW
4515 i915_gem_object_put_pages(obj);
4516
71acb5eb
DA
4517 return 0;
4518out:
4519 return ret;
4520}
4521
4522static int
4523i915_gem_phys_pwrite(struct drm_device *dev, struct drm_gem_object *obj,
4524 struct drm_i915_gem_pwrite *args,
4525 struct drm_file *file_priv)
4526{
4527 struct drm_i915_gem_object *obj_priv = obj->driver_private;
4528 void *obj_addr;
4529 int ret;
4530 char __user *user_data;
4531
4532 user_data = (char __user *) (uintptr_t) args->data_ptr;
4533 obj_addr = obj_priv->phys_obj->handle->vaddr + args->offset;
4534
e08fb4f6 4535 DRM_DEBUG("obj_addr %p, %lld\n", obj_addr, args->size);
71acb5eb
DA
4536 ret = copy_from_user(obj_addr, user_data, args->size);
4537 if (ret)
4538 return -EFAULT;
4539
4540 drm_agp_chipset_flush(dev);
4541 return 0;
4542}
b962442e
EA
4543
4544void i915_gem_release(struct drm_device * dev, struct drm_file *file_priv)
4545{
4546 struct drm_i915_file_private *i915_file_priv = file_priv->driver_priv;
4547
4548 /* Clean up our request list when the client is going away, so that
4549 * later retire_requests won't dereference our soon-to-be-gone
4550 * file_priv.
4551 */
4552 mutex_lock(&dev->struct_mutex);
4553 while (!list_empty(&i915_file_priv->mm.request_list))
4554 list_del_init(i915_file_priv->mm.request_list.next);
4555 mutex_unlock(&dev->struct_mutex);
4556}
31169714
CW
4557
4558/* Immediately discard the backing storage */
4559static void
4560i915_gem_object_truncate(struct drm_gem_object *obj)
4561{
4562 struct inode *inode;
4563
4564 inode = obj->filp->f_path.dentry->d_inode;
4565
4566 mutex_lock(&inode->i_mutex);
4567 truncate_inode_pages(inode->i_mapping, 0);
4568 mutex_unlock(&inode->i_mutex);
4569}
4570
4571static inline int
4572i915_gem_object_is_purgeable(struct drm_i915_gem_object *obj_priv)
4573{
3ef94daa 4574 return !obj_priv->dirty || obj_priv->madv == I915_MADV_DONTNEED;
31169714
CW
4575}
4576
4577static int
4578i915_gem_shrink(int nr_to_scan, gfp_t gfp_mask)
4579{
4580 drm_i915_private_t *dev_priv, *next_dev;
4581 struct drm_i915_gem_object *obj_priv, *next_obj;
4582 int cnt = 0;
4583 int would_deadlock = 1;
4584
4585 /* "fast-path" to count number of available objects */
4586 if (nr_to_scan == 0) {
4587 spin_lock(&shrink_list_lock);
4588 list_for_each_entry(dev_priv, &shrink_list, mm.shrink_list) {
4589 struct drm_device *dev = dev_priv->dev;
4590
4591 if (mutex_trylock(&dev->struct_mutex)) {
4592 list_for_each_entry(obj_priv,
4593 &dev_priv->mm.inactive_list,
4594 list)
4595 cnt++;
4596 mutex_unlock(&dev->struct_mutex);
4597 }
4598 }
4599 spin_unlock(&shrink_list_lock);
4600
4601 return (cnt / 100) * sysctl_vfs_cache_pressure;
4602 }
4603
4604 spin_lock(&shrink_list_lock);
4605
4606 /* first scan for clean buffers */
4607 list_for_each_entry_safe(dev_priv, next_dev,
4608 &shrink_list, mm.shrink_list) {
4609 struct drm_device *dev = dev_priv->dev;
4610
4611 if (! mutex_trylock(&dev->struct_mutex))
4612 continue;
4613
4614 spin_unlock(&shrink_list_lock);
4615
4616 i915_gem_retire_requests(dev);
4617
4618 list_for_each_entry_safe(obj_priv, next_obj,
4619 &dev_priv->mm.inactive_list,
4620 list) {
4621 if (i915_gem_object_is_purgeable(obj_priv)) {
4622 struct drm_gem_object *obj = obj_priv->obj;
4623 i915_gem_object_unbind(obj);
4624 i915_gem_object_truncate(obj);
4625
4626 if (--nr_to_scan <= 0)
4627 break;
4628 }
4629 }
4630
4631 spin_lock(&shrink_list_lock);
4632 mutex_unlock(&dev->struct_mutex);
4633
4634 if (nr_to_scan <= 0)
4635 break;
4636 }
4637
4638 /* second pass, evict/count anything still on the inactive list */
4639 list_for_each_entry_safe(dev_priv, next_dev,
4640 &shrink_list, mm.shrink_list) {
4641 struct drm_device *dev = dev_priv->dev;
4642
4643 if (! mutex_trylock(&dev->struct_mutex))
4644 continue;
4645
4646 spin_unlock(&shrink_list_lock);
4647
4648 list_for_each_entry_safe(obj_priv, next_obj,
4649 &dev_priv->mm.inactive_list,
4650 list) {
4651 if (nr_to_scan > 0) {
4652 struct drm_gem_object *obj = obj_priv->obj;
4653 i915_gem_object_unbind(obj);
4654 if (i915_gem_object_is_purgeable(obj_priv))
4655 i915_gem_object_truncate(obj);
4656
4657 nr_to_scan--;
4658 } else
4659 cnt++;
4660 }
4661
4662 spin_lock(&shrink_list_lock);
4663 mutex_unlock(&dev->struct_mutex);
4664
4665 would_deadlock = 0;
4666 }
4667
4668 spin_unlock(&shrink_list_lock);
4669
4670 if (would_deadlock)
4671 return -1;
4672 else if (cnt > 0)
4673 return (cnt / 100) * sysctl_vfs_cache_pressure;
4674 else
4675 return 0;
4676}
4677
4678static struct shrinker shrinker = {
4679 .shrink = i915_gem_shrink,
4680 .seeks = DEFAULT_SEEKS,
4681};
4682
4683__init void
4684i915_gem_shrinker_init(void)
4685{
4686 register_shrinker(&shrinker);
4687}
4688
4689__exit void
4690i915_gem_shrinker_exit(void)
4691{
4692 unregister_shrinker(&shrinker);
4693}