]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/pci/intel-iommu.c
intel-iommu: fault_reason index cleanup
[net-next-2.6.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
17 * Copyright (C) Ashok Raj <ashok.raj@intel.com>
18 * Copyright (C) Shaohua Li <shaohua.li@intel.com>
19 * Copyright (C) Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
20 */
21
22#include <linux/init.h>
23#include <linux/bitmap.h>
24#include <linux/slab.h>
25#include <linux/irq.h>
26#include <linux/interrupt.h>
27#include <linux/sysdev.h>
28#include <linux/spinlock.h>
29#include <linux/pci.h>
30#include <linux/dmar.h>
31#include <linux/dma-mapping.h>
32#include <linux/mempool.h>
33#include "iova.h"
34#include "intel-iommu.h"
35#include <asm/proto.h> /* force_iommu in this header in x86-64*/
36#include <asm/cacheflush.h>
395624fc 37#include <asm/gart.h>
ba395927
KA
38#include "pci.h"
39
40#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
41#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
42
43#define IOAPIC_RANGE_START (0xfee00000)
44#define IOAPIC_RANGE_END (0xfeefffff)
45#define IOVA_START_ADDR (0x1000)
46
47#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
48
49#define DMAR_OPERATION_TIMEOUT (HZ*60) /* 1m */
50
51#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
52
53static void domain_remove_dev_info(struct dmar_domain *domain);
54
55static int dmar_disabled;
56static int __initdata dmar_map_gfx = 1;
7d3b03ce 57static int dmar_forcedac;
ba395927
KA
58
59#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
60static DEFINE_SPINLOCK(device_domain_lock);
61static LIST_HEAD(device_domain_list);
62
63static int __init intel_iommu_setup(char *str)
64{
65 if (!str)
66 return -EINVAL;
67 while (*str) {
68 if (!strncmp(str, "off", 3)) {
69 dmar_disabled = 1;
70 printk(KERN_INFO"Intel-IOMMU: disabled\n");
71 } else if (!strncmp(str, "igfx_off", 8)) {
72 dmar_map_gfx = 0;
73 printk(KERN_INFO
74 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce
KA
75 } else if (!strncmp(str, "forcedac", 8)) {
76 printk (KERN_INFO
77 "Intel-IOMMU: Forcing DAC for PCI devices\n");
78 dmar_forcedac = 1;
ba395927
KA
79 }
80
81 str += strcspn(str, ",");
82 while (*str == ',')
83 str++;
84 }
85 return 0;
86}
87__setup("intel_iommu=", intel_iommu_setup);
88
89static struct kmem_cache *iommu_domain_cache;
90static struct kmem_cache *iommu_devinfo_cache;
91static struct kmem_cache *iommu_iova_cache;
92
eb3fa7cb
KA
93static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
94{
95 unsigned int flags;
96 void *vaddr;
97
98 /* trying to avoid low memory issues */
99 flags = current->flags & PF_MEMALLOC;
100 current->flags |= PF_MEMALLOC;
101 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
102 current->flags &= (~PF_MEMALLOC | flags);
103 return vaddr;
104}
105
106
ba395927
KA
107static inline void *alloc_pgtable_page(void)
108{
eb3fa7cb
KA
109 unsigned int flags;
110 void *vaddr;
111
112 /* trying to avoid low memory issues */
113 flags = current->flags & PF_MEMALLOC;
114 current->flags |= PF_MEMALLOC;
115 vaddr = (void *)get_zeroed_page(GFP_ATOMIC);
116 current->flags &= (~PF_MEMALLOC | flags);
117 return vaddr;
ba395927
KA
118}
119
120static inline void free_pgtable_page(void *vaddr)
121{
122 free_page((unsigned long)vaddr);
123}
124
125static inline void *alloc_domain_mem(void)
126{
eb3fa7cb 127 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
128}
129
130static inline void free_domain_mem(void *vaddr)
131{
132 kmem_cache_free(iommu_domain_cache, vaddr);
133}
134
135static inline void * alloc_devinfo_mem(void)
136{
eb3fa7cb 137 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
138}
139
140static inline void free_devinfo_mem(void *vaddr)
141{
142 kmem_cache_free(iommu_devinfo_cache, vaddr);
143}
144
145struct iova *alloc_iova_mem(void)
146{
eb3fa7cb 147 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
148}
149
150void free_iova_mem(struct iova *iova)
151{
152 kmem_cache_free(iommu_iova_cache, iova);
153}
154
155static inline void __iommu_flush_cache(
156 struct intel_iommu *iommu, void *addr, int size)
157{
158 if (!ecap_coherent(iommu->ecap))
159 clflush_cache_range(addr, size);
160}
161
162/* Gets context entry for a given bus and devfn */
163static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
164 u8 bus, u8 devfn)
165{
166 struct root_entry *root;
167 struct context_entry *context;
168 unsigned long phy_addr;
169 unsigned long flags;
170
171 spin_lock_irqsave(&iommu->lock, flags);
172 root = &iommu->root_entry[bus];
173 context = get_context_addr_from_root(root);
174 if (!context) {
175 context = (struct context_entry *)alloc_pgtable_page();
176 if (!context) {
177 spin_unlock_irqrestore(&iommu->lock, flags);
178 return NULL;
179 }
180 __iommu_flush_cache(iommu, (void *)context, PAGE_SIZE_4K);
181 phy_addr = virt_to_phys((void *)context);
182 set_root_value(root, phy_addr);
183 set_root_present(root);
184 __iommu_flush_cache(iommu, root, sizeof(*root));
185 }
186 spin_unlock_irqrestore(&iommu->lock, flags);
187 return &context[devfn];
188}
189
190static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
191{
192 struct root_entry *root;
193 struct context_entry *context;
194 int ret;
195 unsigned long flags;
196
197 spin_lock_irqsave(&iommu->lock, flags);
198 root = &iommu->root_entry[bus];
199 context = get_context_addr_from_root(root);
200 if (!context) {
201 ret = 0;
202 goto out;
203 }
204 ret = context_present(context[devfn]);
205out:
206 spin_unlock_irqrestore(&iommu->lock, flags);
207 return ret;
208}
209
210static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
211{
212 struct root_entry *root;
213 struct context_entry *context;
214 unsigned long flags;
215
216 spin_lock_irqsave(&iommu->lock, flags);
217 root = &iommu->root_entry[bus];
218 context = get_context_addr_from_root(root);
219 if (context) {
220 context_clear_entry(context[devfn]);
221 __iommu_flush_cache(iommu, &context[devfn], \
222 sizeof(*context));
223 }
224 spin_unlock_irqrestore(&iommu->lock, flags);
225}
226
227static void free_context_table(struct intel_iommu *iommu)
228{
229 struct root_entry *root;
230 int i;
231 unsigned long flags;
232 struct context_entry *context;
233
234 spin_lock_irqsave(&iommu->lock, flags);
235 if (!iommu->root_entry) {
236 goto out;
237 }
238 for (i = 0; i < ROOT_ENTRY_NR; i++) {
239 root = &iommu->root_entry[i];
240 context = get_context_addr_from_root(root);
241 if (context)
242 free_pgtable_page(context);
243 }
244 free_pgtable_page(iommu->root_entry);
245 iommu->root_entry = NULL;
246out:
247 spin_unlock_irqrestore(&iommu->lock, flags);
248}
249
250/* page table handling */
251#define LEVEL_STRIDE (9)
252#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
253
254static inline int agaw_to_level(int agaw)
255{
256 return agaw + 2;
257}
258
259static inline int agaw_to_width(int agaw)
260{
261 return 30 + agaw * LEVEL_STRIDE;
262
263}
264
265static inline int width_to_agaw(int width)
266{
267 return (width - 30) / LEVEL_STRIDE;
268}
269
270static inline unsigned int level_to_offset_bits(int level)
271{
272 return (12 + (level - 1) * LEVEL_STRIDE);
273}
274
275static inline int address_level_offset(u64 addr, int level)
276{
277 return ((addr >> level_to_offset_bits(level)) & LEVEL_MASK);
278}
279
280static inline u64 level_mask(int level)
281{
282 return ((u64)-1 << level_to_offset_bits(level));
283}
284
285static inline u64 level_size(int level)
286{
287 return ((u64)1 << level_to_offset_bits(level));
288}
289
290static inline u64 align_to_level(u64 addr, int level)
291{
292 return ((addr + level_size(level) - 1) & level_mask(level));
293}
294
295static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr)
296{
297 int addr_width = agaw_to_width(domain->agaw);
298 struct dma_pte *parent, *pte = NULL;
299 int level = agaw_to_level(domain->agaw);
300 int offset;
301 unsigned long flags;
302
303 BUG_ON(!domain->pgd);
304
305 addr &= (((u64)1) << addr_width) - 1;
306 parent = domain->pgd;
307
308 spin_lock_irqsave(&domain->mapping_lock, flags);
309 while (level > 0) {
310 void *tmp_page;
311
312 offset = address_level_offset(addr, level);
313 pte = &parent[offset];
314 if (level == 1)
315 break;
316
317 if (!dma_pte_present(*pte)) {
318 tmp_page = alloc_pgtable_page();
319
320 if (!tmp_page) {
321 spin_unlock_irqrestore(&domain->mapping_lock,
322 flags);
323 return NULL;
324 }
325 __iommu_flush_cache(domain->iommu, tmp_page,
326 PAGE_SIZE_4K);
327 dma_set_pte_addr(*pte, virt_to_phys(tmp_page));
328 /*
329 * high level table always sets r/w, last level page
330 * table control read/write
331 */
332 dma_set_pte_readable(*pte);
333 dma_set_pte_writable(*pte);
334 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
335 }
336 parent = phys_to_virt(dma_pte_addr(*pte));
337 level--;
338 }
339
340 spin_unlock_irqrestore(&domain->mapping_lock, flags);
341 return pte;
342}
343
344/* return address's pte at specific level */
345static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr,
346 int level)
347{
348 struct dma_pte *parent, *pte = NULL;
349 int total = agaw_to_level(domain->agaw);
350 int offset;
351
352 parent = domain->pgd;
353 while (level <= total) {
354 offset = address_level_offset(addr, total);
355 pte = &parent[offset];
356 if (level == total)
357 return pte;
358
359 if (!dma_pte_present(*pte))
360 break;
361 parent = phys_to_virt(dma_pte_addr(*pte));
362 total--;
363 }
364 return NULL;
365}
366
367/* clear one page's page table */
368static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr)
369{
370 struct dma_pte *pte = NULL;
371
372 /* get last level pte */
373 pte = dma_addr_level_pte(domain, addr, 1);
374
375 if (pte) {
376 dma_clear_pte(*pte);
377 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
378 }
379}
380
381/* clear last level pte, a tlb flush should be followed */
382static void dma_pte_clear_range(struct dmar_domain *domain, u64 start, u64 end)
383{
384 int addr_width = agaw_to_width(domain->agaw);
385
386 start &= (((u64)1) << addr_width) - 1;
387 end &= (((u64)1) << addr_width) - 1;
388 /* in case it's partial page */
389 start = PAGE_ALIGN_4K(start);
390 end &= PAGE_MASK_4K;
391
392 /* we don't need lock here, nobody else touches the iova range */
393 while (start < end) {
394 dma_pte_clear_one(domain, start);
395 start += PAGE_SIZE_4K;
396 }
397}
398
399/* free page table pages. last level pte should already be cleared */
400static void dma_pte_free_pagetable(struct dmar_domain *domain,
401 u64 start, u64 end)
402{
403 int addr_width = agaw_to_width(domain->agaw);
404 struct dma_pte *pte;
405 int total = agaw_to_level(domain->agaw);
406 int level;
407 u64 tmp;
408
409 start &= (((u64)1) << addr_width) - 1;
410 end &= (((u64)1) << addr_width) - 1;
411
412 /* we don't need lock here, nobody else touches the iova range */
413 level = 2;
414 while (level <= total) {
415 tmp = align_to_level(start, level);
416 if (tmp >= end || (tmp + level_size(level) > end))
417 return;
418
419 while (tmp < end) {
420 pte = dma_addr_level_pte(domain, tmp, level);
421 if (pte) {
422 free_pgtable_page(
423 phys_to_virt(dma_pte_addr(*pte)));
424 dma_clear_pte(*pte);
425 __iommu_flush_cache(domain->iommu,
426 pte, sizeof(*pte));
427 }
428 tmp += level_size(level);
429 }
430 level++;
431 }
432 /* free pgd */
433 if (start == 0 && end >= ((((u64)1) << addr_width) - 1)) {
434 free_pgtable_page(domain->pgd);
435 domain->pgd = NULL;
436 }
437}
438
439/* iommu handling */
440static int iommu_alloc_root_entry(struct intel_iommu *iommu)
441{
442 struct root_entry *root;
443 unsigned long flags;
444
445 root = (struct root_entry *)alloc_pgtable_page();
446 if (!root)
447 return -ENOMEM;
448
449 __iommu_flush_cache(iommu, root, PAGE_SIZE_4K);
450
451 spin_lock_irqsave(&iommu->lock, flags);
452 iommu->root_entry = root;
453 spin_unlock_irqrestore(&iommu->lock, flags);
454
455 return 0;
456}
457
458#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
459{\
460 unsigned long start_time = jiffies;\
461 while (1) {\
462 sts = op (iommu->reg + offset);\
463 if (cond)\
464 break;\
465 if (time_after(jiffies, start_time + DMAR_OPERATION_TIMEOUT))\
466 panic("DMAR hardware is malfunctioning\n");\
467 cpu_relax();\
468 }\
469}
470
471static void iommu_set_root_entry(struct intel_iommu *iommu)
472{
473 void *addr;
474 u32 cmd, sts;
475 unsigned long flag;
476
477 addr = iommu->root_entry;
478
479 spin_lock_irqsave(&iommu->register_lock, flag);
480 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
481
482 cmd = iommu->gcmd | DMA_GCMD_SRTP;
483 writel(cmd, iommu->reg + DMAR_GCMD_REG);
484
485 /* Make sure hardware complete it */
486 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
487 readl, (sts & DMA_GSTS_RTPS), sts);
488
489 spin_unlock_irqrestore(&iommu->register_lock, flag);
490}
491
492static void iommu_flush_write_buffer(struct intel_iommu *iommu)
493{
494 u32 val;
495 unsigned long flag;
496
497 if (!cap_rwbf(iommu->cap))
498 return;
499 val = iommu->gcmd | DMA_GCMD_WBF;
500
501 spin_lock_irqsave(&iommu->register_lock, flag);
502 writel(val, iommu->reg + DMAR_GCMD_REG);
503
504 /* Make sure hardware complete it */
505 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
506 readl, (!(val & DMA_GSTS_WBFS)), val);
507
508 spin_unlock_irqrestore(&iommu->register_lock, flag);
509}
510
511/* return value determine if we need a write buffer flush */
512static int __iommu_flush_context(struct intel_iommu *iommu,
513 u16 did, u16 source_id, u8 function_mask, u64 type,
514 int non_present_entry_flush)
515{
516 u64 val = 0;
517 unsigned long flag;
518
519 /*
520 * In the non-present entry flush case, if hardware doesn't cache
521 * non-present entry we do nothing and if hardware cache non-present
522 * entry, we flush entries of domain 0 (the domain id is used to cache
523 * any non-present entries)
524 */
525 if (non_present_entry_flush) {
526 if (!cap_caching_mode(iommu->cap))
527 return 1;
528 else
529 did = 0;
530 }
531
532 switch (type) {
533 case DMA_CCMD_GLOBAL_INVL:
534 val = DMA_CCMD_GLOBAL_INVL;
535 break;
536 case DMA_CCMD_DOMAIN_INVL:
537 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
538 break;
539 case DMA_CCMD_DEVICE_INVL:
540 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
541 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
542 break;
543 default:
544 BUG();
545 }
546 val |= DMA_CCMD_ICC;
547
548 spin_lock_irqsave(&iommu->register_lock, flag);
549 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
550
551 /* Make sure hardware complete it */
552 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
553 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
554
555 spin_unlock_irqrestore(&iommu->register_lock, flag);
556
557 /* flush context entry will implictly flush write buffer */
558 return 0;
559}
560
561static int inline iommu_flush_context_global(struct intel_iommu *iommu,
562 int non_present_entry_flush)
563{
564 return __iommu_flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL,
565 non_present_entry_flush);
566}
567
568static int inline iommu_flush_context_domain(struct intel_iommu *iommu, u16 did,
569 int non_present_entry_flush)
570{
571 return __iommu_flush_context(iommu, did, 0, 0, DMA_CCMD_DOMAIN_INVL,
572 non_present_entry_flush);
573}
574
575static int inline iommu_flush_context_device(struct intel_iommu *iommu,
576 u16 did, u16 source_id, u8 function_mask, int non_present_entry_flush)
577{
578 return __iommu_flush_context(iommu, did, source_id, function_mask,
579 DMA_CCMD_DEVICE_INVL, non_present_entry_flush);
580}
581
582/* return value determine if we need a write buffer flush */
583static int __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
584 u64 addr, unsigned int size_order, u64 type,
585 int non_present_entry_flush)
586{
587 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
588 u64 val = 0, val_iva = 0;
589 unsigned long flag;
590
591 /*
592 * In the non-present entry flush case, if hardware doesn't cache
593 * non-present entry we do nothing and if hardware cache non-present
594 * entry, we flush entries of domain 0 (the domain id is used to cache
595 * any non-present entries)
596 */
597 if (non_present_entry_flush) {
598 if (!cap_caching_mode(iommu->cap))
599 return 1;
600 else
601 did = 0;
602 }
603
604 switch (type) {
605 case DMA_TLB_GLOBAL_FLUSH:
606 /* global flush doesn't need set IVA_REG */
607 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
608 break;
609 case DMA_TLB_DSI_FLUSH:
610 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
611 break;
612 case DMA_TLB_PSI_FLUSH:
613 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
614 /* Note: always flush non-leaf currently */
615 val_iva = size_order | addr;
616 break;
617 default:
618 BUG();
619 }
620 /* Note: set drain read/write */
621#if 0
622 /*
623 * This is probably to be super secure.. Looks like we can
624 * ignore it without any impact.
625 */
626 if (cap_read_drain(iommu->cap))
627 val |= DMA_TLB_READ_DRAIN;
628#endif
629 if (cap_write_drain(iommu->cap))
630 val |= DMA_TLB_WRITE_DRAIN;
631
632 spin_lock_irqsave(&iommu->register_lock, flag);
633 /* Note: Only uses first TLB reg currently */
634 if (val_iva)
635 dmar_writeq(iommu->reg + tlb_offset, val_iva);
636 dmar_writeq(iommu->reg + tlb_offset + 8, val);
637
638 /* Make sure hardware complete it */
639 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
640 dmar_readq, (!(val & DMA_TLB_IVT)), val);
641
642 spin_unlock_irqrestore(&iommu->register_lock, flag);
643
644 /* check IOTLB invalidation granularity */
645 if (DMA_TLB_IAIG(val) == 0)
646 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
647 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
648 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
649 DMA_TLB_IIRG(type), DMA_TLB_IAIG(val));
650 /* flush context entry will implictly flush write buffer */
651 return 0;
652}
653
654static int inline iommu_flush_iotlb_global(struct intel_iommu *iommu,
655 int non_present_entry_flush)
656{
657 return __iommu_flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH,
658 non_present_entry_flush);
659}
660
661static int inline iommu_flush_iotlb_dsi(struct intel_iommu *iommu, u16 did,
662 int non_present_entry_flush)
663{
664 return __iommu_flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH,
665 non_present_entry_flush);
666}
667
ba395927
KA
668static int iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
669 u64 addr, unsigned int pages, int non_present_entry_flush)
670{
f76aec76 671 unsigned int mask;
ba395927
KA
672
673 BUG_ON(addr & (~PAGE_MASK_4K));
674 BUG_ON(pages == 0);
675
676 /* Fallback to domain selective flush if no PSI support */
677 if (!cap_pgsel_inv(iommu->cap))
678 return iommu_flush_iotlb_dsi(iommu, did,
679 non_present_entry_flush);
680
681 /*
682 * PSI requires page size to be 2 ^ x, and the base address is naturally
683 * aligned to the size
684 */
f76aec76 685 mask = ilog2(__roundup_pow_of_two(pages));
ba395927 686 /* Fallback to domain selective flush if size is too big */
f76aec76 687 if (mask > cap_max_amask_val(iommu->cap))
ba395927
KA
688 return iommu_flush_iotlb_dsi(iommu, did,
689 non_present_entry_flush);
690
f76aec76 691 return __iommu_flush_iotlb(iommu, did, addr, mask,
ba395927
KA
692 DMA_TLB_PSI_FLUSH, non_present_entry_flush);
693}
694
f8bab735 695static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
696{
697 u32 pmen;
698 unsigned long flags;
699
700 spin_lock_irqsave(&iommu->register_lock, flags);
701 pmen = readl(iommu->reg + DMAR_PMEN_REG);
702 pmen &= ~DMA_PMEN_EPM;
703 writel(pmen, iommu->reg + DMAR_PMEN_REG);
704
705 /* wait for the protected region status bit to clear */
706 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
707 readl, !(pmen & DMA_PMEN_PRS), pmen);
708
709 spin_unlock_irqrestore(&iommu->register_lock, flags);
710}
711
ba395927
KA
712static int iommu_enable_translation(struct intel_iommu *iommu)
713{
714 u32 sts;
715 unsigned long flags;
716
717 spin_lock_irqsave(&iommu->register_lock, flags);
718 writel(iommu->gcmd|DMA_GCMD_TE, iommu->reg + DMAR_GCMD_REG);
719
720 /* Make sure hardware complete it */
721 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
722 readl, (sts & DMA_GSTS_TES), sts);
723
724 iommu->gcmd |= DMA_GCMD_TE;
725 spin_unlock_irqrestore(&iommu->register_lock, flags);
726 return 0;
727}
728
729static int iommu_disable_translation(struct intel_iommu *iommu)
730{
731 u32 sts;
732 unsigned long flag;
733
734 spin_lock_irqsave(&iommu->register_lock, flag);
735 iommu->gcmd &= ~DMA_GCMD_TE;
736 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
737
738 /* Make sure hardware complete it */
739 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
740 readl, (!(sts & DMA_GSTS_TES)), sts);
741
742 spin_unlock_irqrestore(&iommu->register_lock, flag);
743 return 0;
744}
745
3460a6d9
KA
746/* iommu interrupt handling. Most stuff are MSI-like. */
747
d94afc6c 748static const char *fault_reason_strings[] =
3460a6d9
KA
749{
750 "Software",
751 "Present bit in root entry is clear",
752 "Present bit in context entry is clear",
753 "Invalid context entry",
754 "Access beyond MGAW",
755 "PTE Write access is not set",
756 "PTE Read access is not set",
757 "Next page table ptr is invalid",
758 "Root table address invalid",
759 "Context table ptr is invalid",
760 "non-zero reserved fields in RTP",
761 "non-zero reserved fields in CTP",
762 "non-zero reserved fields in PTE",
3460a6d9 763};
f8bab735 764#define MAX_FAULT_REASON_IDX (ARRAY_SIZE(fault_reason_strings) - 1)
3460a6d9 765
d94afc6c 766const char *dmar_get_fault_reason(u8 fault_reason)
3460a6d9 767{
d94afc6c 768 if (fault_reason > MAX_FAULT_REASON_IDX)
769 return "Unknown";
3460a6d9
KA
770 else
771 return fault_reason_strings[fault_reason];
772}
773
774void dmar_msi_unmask(unsigned int irq)
775{
776 struct intel_iommu *iommu = get_irq_data(irq);
777 unsigned long flag;
778
779 /* unmask it */
780 spin_lock_irqsave(&iommu->register_lock, flag);
781 writel(0, iommu->reg + DMAR_FECTL_REG);
782 /* Read a reg to force flush the post write */
783 readl(iommu->reg + DMAR_FECTL_REG);
784 spin_unlock_irqrestore(&iommu->register_lock, flag);
785}
786
787void dmar_msi_mask(unsigned int irq)
788{
789 unsigned long flag;
790 struct intel_iommu *iommu = get_irq_data(irq);
791
792 /* mask it */
793 spin_lock_irqsave(&iommu->register_lock, flag);
794 writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
795 /* Read a reg to force flush the post write */
796 readl(iommu->reg + DMAR_FECTL_REG);
797 spin_unlock_irqrestore(&iommu->register_lock, flag);
798}
799
800void dmar_msi_write(int irq, struct msi_msg *msg)
801{
802 struct intel_iommu *iommu = get_irq_data(irq);
803 unsigned long flag;
804
805 spin_lock_irqsave(&iommu->register_lock, flag);
806 writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
807 writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
808 writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
809 spin_unlock_irqrestore(&iommu->register_lock, flag);
810}
811
812void dmar_msi_read(int irq, struct msi_msg *msg)
813{
814 struct intel_iommu *iommu = get_irq_data(irq);
815 unsigned long flag;
816
817 spin_lock_irqsave(&iommu->register_lock, flag);
818 msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
819 msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
820 msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
821 spin_unlock_irqrestore(&iommu->register_lock, flag);
822}
823
824static int iommu_page_fault_do_one(struct intel_iommu *iommu, int type,
825 u8 fault_reason, u16 source_id, u64 addr)
826{
d94afc6c 827 const char *reason;
3460a6d9
KA
828
829 reason = dmar_get_fault_reason(fault_reason);
830
831 printk(KERN_ERR
832 "DMAR:[%s] Request device [%02x:%02x.%d] "
833 "fault addr %llx \n"
834 "DMAR:[fault reason %02d] %s\n",
835 (type ? "DMA Read" : "DMA Write"),
836 (source_id >> 8), PCI_SLOT(source_id & 0xFF),
837 PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
838 return 0;
839}
840
841#define PRIMARY_FAULT_REG_LEN (16)
842static irqreturn_t iommu_page_fault(int irq, void *dev_id)
843{
844 struct intel_iommu *iommu = dev_id;
845 int reg, fault_index;
846 u32 fault_status;
847 unsigned long flag;
848
849 spin_lock_irqsave(&iommu->register_lock, flag);
850 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
851
852 /* TBD: ignore advanced fault log currently */
853 if (!(fault_status & DMA_FSTS_PPF))
854 goto clear_overflow;
855
856 fault_index = dma_fsts_fault_record_index(fault_status);
857 reg = cap_fault_reg_offset(iommu->cap);
858 while (1) {
859 u8 fault_reason;
860 u16 source_id;
861 u64 guest_addr;
862 int type;
863 u32 data;
864
865 /* highest 32 bits */
866 data = readl(iommu->reg + reg +
867 fault_index * PRIMARY_FAULT_REG_LEN + 12);
868 if (!(data & DMA_FRCD_F))
869 break;
870
871 fault_reason = dma_frcd_fault_reason(data);
872 type = dma_frcd_type(data);
873
874 data = readl(iommu->reg + reg +
875 fault_index * PRIMARY_FAULT_REG_LEN + 8);
876 source_id = dma_frcd_source_id(data);
877
878 guest_addr = dmar_readq(iommu->reg + reg +
879 fault_index * PRIMARY_FAULT_REG_LEN);
880 guest_addr = dma_frcd_page_addr(guest_addr);
881 /* clear the fault */
882 writel(DMA_FRCD_F, iommu->reg + reg +
883 fault_index * PRIMARY_FAULT_REG_LEN + 12);
884
885 spin_unlock_irqrestore(&iommu->register_lock, flag);
886
887 iommu_page_fault_do_one(iommu, type, fault_reason,
888 source_id, guest_addr);
889
890 fault_index++;
891 if (fault_index > cap_num_fault_regs(iommu->cap))
892 fault_index = 0;
893 spin_lock_irqsave(&iommu->register_lock, flag);
894 }
895clear_overflow:
896 /* clear primary fault overflow */
897 fault_status = readl(iommu->reg + DMAR_FSTS_REG);
898 if (fault_status & DMA_FSTS_PFO)
899 writel(DMA_FSTS_PFO, iommu->reg + DMAR_FSTS_REG);
900
901 spin_unlock_irqrestore(&iommu->register_lock, flag);
902 return IRQ_HANDLED;
903}
904
905int dmar_set_interrupt(struct intel_iommu *iommu)
906{
907 int irq, ret;
908
909 irq = create_irq();
910 if (!irq) {
911 printk(KERN_ERR "IOMMU: no free vectors\n");
912 return -EINVAL;
913 }
914
915 set_irq_data(irq, iommu);
916 iommu->irq = irq;
917
918 ret = arch_setup_dmar_msi(irq);
919 if (ret) {
920 set_irq_data(irq, NULL);
921 iommu->irq = 0;
922 destroy_irq(irq);
923 return 0;
924 }
925
926 /* Force fault register is cleared */
927 iommu_page_fault(irq, iommu);
928
929 ret = request_irq(irq, iommu_page_fault, 0, iommu->name, iommu);
930 if (ret)
931 printk(KERN_ERR "IOMMU: can't request irq\n");
932 return ret;
933}
934
ba395927
KA
935static int iommu_init_domains(struct intel_iommu *iommu)
936{
937 unsigned long ndomains;
938 unsigned long nlongs;
939
940 ndomains = cap_ndoms(iommu->cap);
941 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
942 nlongs = BITS_TO_LONGS(ndomains);
943
944 /* TBD: there might be 64K domains,
945 * consider other allocation for future chip
946 */
947 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
948 if (!iommu->domain_ids) {
949 printk(KERN_ERR "Allocating domain id array failed\n");
950 return -ENOMEM;
951 }
952 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
953 GFP_KERNEL);
954 if (!iommu->domains) {
955 printk(KERN_ERR "Allocating domain array failed\n");
956 kfree(iommu->domain_ids);
957 return -ENOMEM;
958 }
959
960 /*
961 * if Caching mode is set, then invalid translations are tagged
962 * with domainid 0. Hence we need to pre-allocate it.
963 */
964 if (cap_caching_mode(iommu->cap))
965 set_bit(0, iommu->domain_ids);
966 return 0;
967}
968
969static struct intel_iommu *alloc_iommu(struct dmar_drhd_unit *drhd)
970{
971 struct intel_iommu *iommu;
972 int ret;
973 int map_size;
974 u32 ver;
975
976 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
977 if (!iommu)
978 return NULL;
979 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
980 if (!iommu->reg) {
981 printk(KERN_ERR "IOMMU: can't map the region\n");
982 goto error;
983 }
984 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
985 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
986
987 /* the registers might be more than one page */
988 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
989 cap_max_fault_reg_offset(iommu->cap));
990 map_size = PAGE_ALIGN_4K(map_size);
991 if (map_size > PAGE_SIZE_4K) {
992 iounmap(iommu->reg);
993 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
994 if (!iommu->reg) {
995 printk(KERN_ERR "IOMMU: can't map the region\n");
996 goto error;
997 }
998 }
999
1000 ver = readl(iommu->reg + DMAR_VER_REG);
1001 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1002 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1003 iommu->cap, iommu->ecap);
1004 ret = iommu_init_domains(iommu);
1005 if (ret)
1006 goto error_unmap;
1007 spin_lock_init(&iommu->lock);
1008 spin_lock_init(&iommu->register_lock);
1009
1010 drhd->iommu = iommu;
1011 return iommu;
1012error_unmap:
1013 iounmap(iommu->reg);
ba395927
KA
1014error:
1015 kfree(iommu);
1016 return NULL;
1017}
1018
1019static void domain_exit(struct dmar_domain *domain);
1020static void free_iommu(struct intel_iommu *iommu)
1021{
1022 struct dmar_domain *domain;
1023 int i;
1024
1025 if (!iommu)
1026 return;
1027
1028 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1029 for (; i < cap_ndoms(iommu->cap); ) {
1030 domain = iommu->domains[i];
1031 clear_bit(i, iommu->domain_ids);
1032 domain_exit(domain);
1033 i = find_next_bit(iommu->domain_ids,
1034 cap_ndoms(iommu->cap), i+1);
1035 }
1036
1037 if (iommu->gcmd & DMA_GCMD_TE)
1038 iommu_disable_translation(iommu);
1039
1040 if (iommu->irq) {
1041 set_irq_data(iommu->irq, NULL);
1042 /* This will mask the irq */
1043 free_irq(iommu->irq, iommu);
1044 destroy_irq(iommu->irq);
1045 }
1046
1047 kfree(iommu->domains);
1048 kfree(iommu->domain_ids);
1049
1050 /* free context mapping */
1051 free_context_table(iommu);
1052
1053 if (iommu->reg)
1054 iounmap(iommu->reg);
1055 kfree(iommu);
1056}
1057
1058static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
1059{
1060 unsigned long num;
1061 unsigned long ndomains;
1062 struct dmar_domain *domain;
1063 unsigned long flags;
1064
1065 domain = alloc_domain_mem();
1066 if (!domain)
1067 return NULL;
1068
1069 ndomains = cap_ndoms(iommu->cap);
1070
1071 spin_lock_irqsave(&iommu->lock, flags);
1072 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1073 if (num >= ndomains) {
1074 spin_unlock_irqrestore(&iommu->lock, flags);
1075 free_domain_mem(domain);
1076 printk(KERN_ERR "IOMMU: no free domain ids\n");
1077 return NULL;
1078 }
1079
1080 set_bit(num, iommu->domain_ids);
1081 domain->id = num;
1082 domain->iommu = iommu;
1083 iommu->domains[num] = domain;
1084 spin_unlock_irqrestore(&iommu->lock, flags);
1085
1086 return domain;
1087}
1088
1089static void iommu_free_domain(struct dmar_domain *domain)
1090{
1091 unsigned long flags;
1092
1093 spin_lock_irqsave(&domain->iommu->lock, flags);
1094 clear_bit(domain->id, domain->iommu->domain_ids);
1095 spin_unlock_irqrestore(&domain->iommu->lock, flags);
1096}
1097
1098static struct iova_domain reserved_iova_list;
1099
1100static void dmar_init_reserved_ranges(void)
1101{
1102 struct pci_dev *pdev = NULL;
1103 struct iova *iova;
1104 int i;
1105 u64 addr, size;
1106
f661197e 1107 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927
KA
1108
1109 /* IOAPIC ranges shouldn't be accessed by DMA */
1110 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1111 IOVA_PFN(IOAPIC_RANGE_END));
1112 if (!iova)
1113 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1114
1115 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1116 for_each_pci_dev(pdev) {
1117 struct resource *r;
1118
1119 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1120 r = &pdev->resource[i];
1121 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1122 continue;
1123 addr = r->start;
1124 addr &= PAGE_MASK_4K;
1125 size = r->end - addr;
1126 size = PAGE_ALIGN_4K(size);
1127 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(addr),
1128 IOVA_PFN(size + addr) - 1);
1129 if (!iova)
1130 printk(KERN_ERR "Reserve iova failed\n");
1131 }
1132 }
1133
1134}
1135
1136static void domain_reserve_special_ranges(struct dmar_domain *domain)
1137{
1138 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1139}
1140
1141static inline int guestwidth_to_adjustwidth(int gaw)
1142{
1143 int agaw;
1144 int r = (gaw - 12) % 9;
1145
1146 if (r == 0)
1147 agaw = gaw;
1148 else
1149 agaw = gaw + 9 - r;
1150 if (agaw > 64)
1151 agaw = 64;
1152 return agaw;
1153}
1154
1155static int domain_init(struct dmar_domain *domain, int guest_width)
1156{
1157 struct intel_iommu *iommu;
1158 int adjust_width, agaw;
1159 unsigned long sagaw;
1160
f661197e 1161 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
ba395927
KA
1162 spin_lock_init(&domain->mapping_lock);
1163
1164 domain_reserve_special_ranges(domain);
1165
1166 /* calculate AGAW */
1167 iommu = domain->iommu;
1168 if (guest_width > cap_mgaw(iommu->cap))
1169 guest_width = cap_mgaw(iommu->cap);
1170 domain->gaw = guest_width;
1171 adjust_width = guestwidth_to_adjustwidth(guest_width);
1172 agaw = width_to_agaw(adjust_width);
1173 sagaw = cap_sagaw(iommu->cap);
1174 if (!test_bit(agaw, &sagaw)) {
1175 /* hardware doesn't support it, choose a bigger one */
1176 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1177 agaw = find_next_bit(&sagaw, 5, agaw);
1178 if (agaw >= 5)
1179 return -ENODEV;
1180 }
1181 domain->agaw = agaw;
1182 INIT_LIST_HEAD(&domain->devices);
1183
1184 /* always allocate the top pgd */
1185 domain->pgd = (struct dma_pte *)alloc_pgtable_page();
1186 if (!domain->pgd)
1187 return -ENOMEM;
1188 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE_4K);
1189 return 0;
1190}
1191
1192static void domain_exit(struct dmar_domain *domain)
1193{
1194 u64 end;
1195
1196 /* Domain 0 is reserved, so dont process it */
1197 if (!domain)
1198 return;
1199
1200 domain_remove_dev_info(domain);
1201 /* destroy iovas */
1202 put_iova_domain(&domain->iovad);
1203 end = DOMAIN_MAX_ADDR(domain->gaw);
1204 end = end & (~PAGE_MASK_4K);
1205
1206 /* clear ptes */
1207 dma_pte_clear_range(domain, 0, end);
1208
1209 /* free page tables */
1210 dma_pte_free_pagetable(domain, 0, end);
1211
1212 iommu_free_domain(domain);
1213 free_domain_mem(domain);
1214}
1215
1216static int domain_context_mapping_one(struct dmar_domain *domain,
1217 u8 bus, u8 devfn)
1218{
1219 struct context_entry *context;
1220 struct intel_iommu *iommu = domain->iommu;
1221 unsigned long flags;
1222
1223 pr_debug("Set context mapping for %02x:%02x.%d\n",
1224 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
1225 BUG_ON(!domain->pgd);
1226 context = device_to_context_entry(iommu, bus, devfn);
1227 if (!context)
1228 return -ENOMEM;
1229 spin_lock_irqsave(&iommu->lock, flags);
1230 if (context_present(*context)) {
1231 spin_unlock_irqrestore(&iommu->lock, flags);
1232 return 0;
1233 }
1234
1235 context_set_domain_id(*context, domain->id);
1236 context_set_address_width(*context, domain->agaw);
1237 context_set_address_root(*context, virt_to_phys(domain->pgd));
1238 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
1239 context_set_fault_enable(*context);
1240 context_set_present(*context);
1241 __iommu_flush_cache(iommu, context, sizeof(*context));
1242
1243 /* it's a non-present to present mapping */
1244 if (iommu_flush_context_device(iommu, domain->id,
1245 (((u16)bus) << 8) | devfn, DMA_CCMD_MASK_NOBIT, 1))
1246 iommu_flush_write_buffer(iommu);
1247 else
1248 iommu_flush_iotlb_dsi(iommu, 0, 0);
1249 spin_unlock_irqrestore(&iommu->lock, flags);
1250 return 0;
1251}
1252
1253static int
1254domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev)
1255{
1256 int ret;
1257 struct pci_dev *tmp, *parent;
1258
1259 ret = domain_context_mapping_one(domain, pdev->bus->number,
1260 pdev->devfn);
1261 if (ret)
1262 return ret;
1263
1264 /* dependent device mapping */
1265 tmp = pci_find_upstream_pcie_bridge(pdev);
1266 if (!tmp)
1267 return 0;
1268 /* Secondary interface's bus number and devfn 0 */
1269 parent = pdev->bus->self;
1270 while (parent != tmp) {
1271 ret = domain_context_mapping_one(domain, parent->bus->number,
1272 parent->devfn);
1273 if (ret)
1274 return ret;
1275 parent = parent->bus->self;
1276 }
1277 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1278 return domain_context_mapping_one(domain,
1279 tmp->subordinate->number, 0);
1280 else /* this is a legacy PCI bridge */
1281 return domain_context_mapping_one(domain,
1282 tmp->bus->number, tmp->devfn);
1283}
1284
1285static int domain_context_mapped(struct dmar_domain *domain,
1286 struct pci_dev *pdev)
1287{
1288 int ret;
1289 struct pci_dev *tmp, *parent;
1290
1291 ret = device_context_mapped(domain->iommu,
1292 pdev->bus->number, pdev->devfn);
1293 if (!ret)
1294 return ret;
1295 /* dependent device mapping */
1296 tmp = pci_find_upstream_pcie_bridge(pdev);
1297 if (!tmp)
1298 return ret;
1299 /* Secondary interface's bus number and devfn 0 */
1300 parent = pdev->bus->self;
1301 while (parent != tmp) {
1302 ret = device_context_mapped(domain->iommu, parent->bus->number,
1303 parent->devfn);
1304 if (!ret)
1305 return ret;
1306 parent = parent->bus->self;
1307 }
1308 if (tmp->is_pcie)
1309 return device_context_mapped(domain->iommu,
1310 tmp->subordinate->number, 0);
1311 else
1312 return device_context_mapped(domain->iommu,
1313 tmp->bus->number, tmp->devfn);
1314}
1315
1316static int
1317domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova,
1318 u64 hpa, size_t size, int prot)
1319{
1320 u64 start_pfn, end_pfn;
1321 struct dma_pte *pte;
1322 int index;
1323
1324 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1325 return -EINVAL;
1326 iova &= PAGE_MASK_4K;
1327 start_pfn = ((u64)hpa) >> PAGE_SHIFT_4K;
1328 end_pfn = (PAGE_ALIGN_4K(((u64)hpa) + size)) >> PAGE_SHIFT_4K;
1329 index = 0;
1330 while (start_pfn < end_pfn) {
1331 pte = addr_to_dma_pte(domain, iova + PAGE_SIZE_4K * index);
1332 if (!pte)
1333 return -ENOMEM;
1334 /* We don't need lock here, nobody else
1335 * touches the iova range
1336 */
1337 BUG_ON(dma_pte_addr(*pte));
1338 dma_set_pte_addr(*pte, start_pfn << PAGE_SHIFT_4K);
1339 dma_set_pte_prot(*pte, prot);
1340 __iommu_flush_cache(domain->iommu, pte, sizeof(*pte));
1341 start_pfn++;
1342 index++;
1343 }
1344 return 0;
1345}
1346
1347static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
1348{
1349 clear_context_table(domain->iommu, bus, devfn);
1350 iommu_flush_context_global(domain->iommu, 0);
1351 iommu_flush_iotlb_global(domain->iommu, 0);
1352}
1353
1354static void domain_remove_dev_info(struct dmar_domain *domain)
1355{
1356 struct device_domain_info *info;
1357 unsigned long flags;
1358
1359 spin_lock_irqsave(&device_domain_lock, flags);
1360 while (!list_empty(&domain->devices)) {
1361 info = list_entry(domain->devices.next,
1362 struct device_domain_info, link);
1363 list_del(&info->link);
1364 list_del(&info->global);
1365 if (info->dev)
358dd8ac 1366 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1367 spin_unlock_irqrestore(&device_domain_lock, flags);
1368
1369 detach_domain_for_dev(info->domain, info->bus, info->devfn);
1370 free_devinfo_mem(info);
1371
1372 spin_lock_irqsave(&device_domain_lock, flags);
1373 }
1374 spin_unlock_irqrestore(&device_domain_lock, flags);
1375}
1376
1377/*
1378 * find_domain
358dd8ac 1379 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927
KA
1380 */
1381struct dmar_domain *
1382find_domain(struct pci_dev *pdev)
1383{
1384 struct device_domain_info *info;
1385
1386 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1387 info = pdev->dev.archdata.iommu;
ba395927
KA
1388 if (info)
1389 return info->domain;
1390 return NULL;
1391}
1392
1393static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1394 struct pci_dev *dev)
1395{
1396 int index;
1397
1398 while (dev) {
1399 for (index = 0; index < cnt; index ++)
1400 if (dev == devices[index])
1401 return 1;
1402
1403 /* Check our parent */
1404 dev = dev->bus->self;
1405 }
1406
1407 return 0;
1408}
1409
1410static struct dmar_drhd_unit *
1411dmar_find_matched_drhd_unit(struct pci_dev *dev)
1412{
1413 struct dmar_drhd_unit *drhd = NULL;
1414
1415 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1416 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1417 drhd->devices_cnt, dev))
1418 return drhd;
1419 }
1420
1421 return NULL;
1422}
1423
1424/* domain is initialized */
1425static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1426{
1427 struct dmar_domain *domain, *found = NULL;
1428 struct intel_iommu *iommu;
1429 struct dmar_drhd_unit *drhd;
1430 struct device_domain_info *info, *tmp;
1431 struct pci_dev *dev_tmp;
1432 unsigned long flags;
1433 int bus = 0, devfn = 0;
1434
1435 domain = find_domain(pdev);
1436 if (domain)
1437 return domain;
1438
1439 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1440 if (dev_tmp) {
1441 if (dev_tmp->is_pcie) {
1442 bus = dev_tmp->subordinate->number;
1443 devfn = 0;
1444 } else {
1445 bus = dev_tmp->bus->number;
1446 devfn = dev_tmp->devfn;
1447 }
1448 spin_lock_irqsave(&device_domain_lock, flags);
1449 list_for_each_entry(info, &device_domain_list, global) {
1450 if (info->bus == bus && info->devfn == devfn) {
1451 found = info->domain;
1452 break;
1453 }
1454 }
1455 spin_unlock_irqrestore(&device_domain_lock, flags);
1456 /* pcie-pci bridge already has a domain, uses it */
1457 if (found) {
1458 domain = found;
1459 goto found_domain;
1460 }
1461 }
1462
1463 /* Allocate new domain for the device */
1464 drhd = dmar_find_matched_drhd_unit(pdev);
1465 if (!drhd) {
1466 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1467 pci_name(pdev));
1468 return NULL;
1469 }
1470 iommu = drhd->iommu;
1471
1472 domain = iommu_alloc_domain(iommu);
1473 if (!domain)
1474 goto error;
1475
1476 if (domain_init(domain, gaw)) {
1477 domain_exit(domain);
1478 goto error;
1479 }
1480
1481 /* register pcie-to-pci device */
1482 if (dev_tmp) {
1483 info = alloc_devinfo_mem();
1484 if (!info) {
1485 domain_exit(domain);
1486 goto error;
1487 }
1488 info->bus = bus;
1489 info->devfn = devfn;
1490 info->dev = NULL;
1491 info->domain = domain;
1492 /* This domain is shared by devices under p2p bridge */
1493 domain->flags |= DOMAIN_FLAG_MULTIPLE_DEVICES;
1494
1495 /* pcie-to-pci bridge already has a domain, uses it */
1496 found = NULL;
1497 spin_lock_irqsave(&device_domain_lock, flags);
1498 list_for_each_entry(tmp, &device_domain_list, global) {
1499 if (tmp->bus == bus && tmp->devfn == devfn) {
1500 found = tmp->domain;
1501 break;
1502 }
1503 }
1504 if (found) {
1505 free_devinfo_mem(info);
1506 domain_exit(domain);
1507 domain = found;
1508 } else {
1509 list_add(&info->link, &domain->devices);
1510 list_add(&info->global, &device_domain_list);
1511 }
1512 spin_unlock_irqrestore(&device_domain_lock, flags);
1513 }
1514
1515found_domain:
1516 info = alloc_devinfo_mem();
1517 if (!info)
1518 goto error;
1519 info->bus = pdev->bus->number;
1520 info->devfn = pdev->devfn;
1521 info->dev = pdev;
1522 info->domain = domain;
1523 spin_lock_irqsave(&device_domain_lock, flags);
1524 /* somebody is fast */
1525 found = find_domain(pdev);
1526 if (found != NULL) {
1527 spin_unlock_irqrestore(&device_domain_lock, flags);
1528 if (found != domain) {
1529 domain_exit(domain);
1530 domain = found;
1531 }
1532 free_devinfo_mem(info);
1533 return domain;
1534 }
1535 list_add(&info->link, &domain->devices);
1536 list_add(&info->global, &device_domain_list);
358dd8ac 1537 pdev->dev.archdata.iommu = info;
ba395927
KA
1538 spin_unlock_irqrestore(&device_domain_lock, flags);
1539 return domain;
1540error:
1541 /* recheck it here, maybe others set it */
1542 return find_domain(pdev);
1543}
1544
1545static int iommu_prepare_identity_map(struct pci_dev *pdev, u64 start, u64 end)
1546{
1547 struct dmar_domain *domain;
1548 unsigned long size;
1549 u64 base;
1550 int ret;
1551
1552 printk(KERN_INFO
1553 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
1554 pci_name(pdev), start, end);
1555 /* page table init */
1556 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
1557 if (!domain)
1558 return -ENOMEM;
1559
1560 /* The address might not be aligned */
1561 base = start & PAGE_MASK_4K;
1562 size = end - base;
1563 size = PAGE_ALIGN_4K(size);
1564 if (!reserve_iova(&domain->iovad, IOVA_PFN(base),
1565 IOVA_PFN(base + size) - 1)) {
1566 printk(KERN_ERR "IOMMU: reserve iova failed\n");
1567 ret = -ENOMEM;
1568 goto error;
1569 }
1570
1571 pr_debug("Mapping reserved region %lx@%llx for %s\n",
1572 size, base, pci_name(pdev));
1573 /*
1574 * RMRR range might have overlap with physical memory range,
1575 * clear it first
1576 */
1577 dma_pte_clear_range(domain, base, base + size);
1578
1579 ret = domain_page_mapping(domain, base, base, size,
1580 DMA_PTE_READ|DMA_PTE_WRITE);
1581 if (ret)
1582 goto error;
1583
1584 /* context entry init */
1585 ret = domain_context_mapping(domain, pdev);
1586 if (!ret)
1587 return 0;
1588error:
1589 domain_exit(domain);
1590 return ret;
1591
1592}
1593
1594static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
1595 struct pci_dev *pdev)
1596{
358dd8ac 1597 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1598 return 0;
1599 return iommu_prepare_identity_map(pdev, rmrr->base_address,
1600 rmrr->end_address + 1);
1601}
1602
e820482c
KA
1603#ifdef CONFIG_DMAR_GFX_WA
1604extern int arch_get_ram_range(int slot, u64 *addr, u64 *size);
1605static void __init iommu_prepare_gfx_mapping(void)
1606{
1607 struct pci_dev *pdev = NULL;
1608 u64 base, size;
1609 int slot;
1610 int ret;
1611
1612 for_each_pci_dev(pdev) {
358dd8ac 1613 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO ||
e820482c
KA
1614 !IS_GFX_DEVICE(pdev))
1615 continue;
1616 printk(KERN_INFO "IOMMU: gfx device %s 1-1 mapping\n",
1617 pci_name(pdev));
1618 slot = arch_get_ram_range(0, &base, &size);
1619 while (slot >= 0) {
1620 ret = iommu_prepare_identity_map(pdev,
1621 base, base + size);
1622 if (ret)
1623 goto error;
1624 slot = arch_get_ram_range(slot, &base, &size);
1625 }
1626 continue;
1627error:
1628 printk(KERN_ERR "IOMMU: mapping reserved region failed\n");
1629 }
1630}
1631#endif
1632
49a0429e
KA
1633#ifdef CONFIG_DMAR_FLOPPY_WA
1634static inline void iommu_prepare_isa(void)
1635{
1636 struct pci_dev *pdev;
1637 int ret;
1638
1639 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
1640 if (!pdev)
1641 return;
1642
1643 printk(KERN_INFO "IOMMU: Prepare 0-16M unity mapping for LPC\n");
1644 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
1645
1646 if (ret)
1647 printk("IOMMU: Failed to create 0-64M identity map, "
1648 "floppy might not work\n");
1649
1650}
1651#else
1652static inline void iommu_prepare_isa(void)
1653{
1654 return;
1655}
1656#endif /* !CONFIG_DMAR_FLPY_WA */
1657
ba395927
KA
1658int __init init_dmars(void)
1659{
1660 struct dmar_drhd_unit *drhd;
1661 struct dmar_rmrr_unit *rmrr;
1662 struct pci_dev *pdev;
1663 struct intel_iommu *iommu;
1664 int ret, unit = 0;
1665
1666 /*
1667 * for each drhd
1668 * allocate root
1669 * initialize and program root entry to not present
1670 * endfor
1671 */
1672 for_each_drhd_unit(drhd) {
1673 if (drhd->ignored)
1674 continue;
1675 iommu = alloc_iommu(drhd);
1676 if (!iommu) {
1677 ret = -ENOMEM;
1678 goto error;
1679 }
1680
1681 /*
1682 * TBD:
1683 * we could share the same root & context tables
1684 * amoung all IOMMU's. Need to Split it later.
1685 */
1686 ret = iommu_alloc_root_entry(iommu);
1687 if (ret) {
1688 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
1689 goto error;
1690 }
1691 }
1692
1693 /*
1694 * For each rmrr
1695 * for each dev attached to rmrr
1696 * do
1697 * locate drhd for dev, alloc domain for dev
1698 * allocate free domain
1699 * allocate page table entries for rmrr
1700 * if context not allocated for bus
1701 * allocate and init context
1702 * set present in root table for this bus
1703 * init context with domain, translation etc
1704 * endfor
1705 * endfor
1706 */
1707 for_each_rmrr_units(rmrr) {
1708 int i;
1709 for (i = 0; i < rmrr->devices_cnt; i++) {
1710 pdev = rmrr->devices[i];
1711 /* some BIOS lists non-exist devices in DMAR table */
1712 if (!pdev)
1713 continue;
1714 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
1715 if (ret)
1716 printk(KERN_ERR
1717 "IOMMU: mapping reserved region failed\n");
1718 }
1719 }
1720
e820482c
KA
1721 iommu_prepare_gfx_mapping();
1722
49a0429e
KA
1723 iommu_prepare_isa();
1724
ba395927
KA
1725 /*
1726 * for each drhd
1727 * enable fault log
1728 * global invalidate context cache
1729 * global invalidate iotlb
1730 * enable translation
1731 */
1732 for_each_drhd_unit(drhd) {
1733 if (drhd->ignored)
1734 continue;
1735 iommu = drhd->iommu;
1736 sprintf (iommu->name, "dmar%d", unit++);
1737
1738 iommu_flush_write_buffer(iommu);
1739
3460a6d9
KA
1740 ret = dmar_set_interrupt(iommu);
1741 if (ret)
1742 goto error;
1743
ba395927
KA
1744 iommu_set_root_entry(iommu);
1745
1746 iommu_flush_context_global(iommu, 0);
1747 iommu_flush_iotlb_global(iommu, 0);
1748
f8bab735 1749 iommu_disable_protect_mem_regions(iommu);
1750
ba395927
KA
1751 ret = iommu_enable_translation(iommu);
1752 if (ret)
1753 goto error;
1754 }
1755
1756 return 0;
1757error:
1758 for_each_drhd_unit(drhd) {
1759 if (drhd->ignored)
1760 continue;
1761 iommu = drhd->iommu;
1762 free_iommu(iommu);
1763 }
1764 return ret;
1765}
1766
1767static inline u64 aligned_size(u64 host_addr, size_t size)
1768{
1769 u64 addr;
1770 addr = (host_addr & (~PAGE_MASK_4K)) + size;
1771 return PAGE_ALIGN_4K(addr);
1772}
1773
1774struct iova *
f76aec76 1775iommu_alloc_iova(struct dmar_domain *domain, size_t size, u64 end)
ba395927 1776{
ba395927
KA
1777 struct iova *piova;
1778
1779 /* Make sure it's in range */
ba395927 1780 end = min_t(u64, DOMAIN_MAX_ADDR(domain->gaw), end);
f76aec76 1781 if (!size || (IOVA_START_ADDR + size > end))
ba395927
KA
1782 return NULL;
1783
1784 piova = alloc_iova(&domain->iovad,
f76aec76 1785 size >> PAGE_SHIFT_4K, IOVA_PFN(end), 1);
ba395927
KA
1786 return piova;
1787}
1788
f76aec76
KA
1789static struct iova *
1790__intel_alloc_iova(struct device *dev, struct dmar_domain *domain,
1791 size_t size)
ba395927 1792{
ba395927 1793 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 1794 struct iova *iova = NULL;
ba395927 1795
7d3b03ce 1796 if ((pdev->dma_mask <= DMA_32BIT_MASK) || (dmar_forcedac)) {
f76aec76 1797 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1798 } else {
1799 /*
1800 * First try to allocate an io virtual address in
1801 * DMA_32BIT_MASK and if that fails then try allocating
3609801e 1802 * from higher range
ba395927 1803 */
f76aec76 1804 iova = iommu_alloc_iova(domain, size, DMA_32BIT_MASK);
ba395927 1805 if (!iova)
f76aec76 1806 iova = iommu_alloc_iova(domain, size, pdev->dma_mask);
ba395927
KA
1807 }
1808
1809 if (!iova) {
1810 printk(KERN_ERR"Allocating iova for %s failed", pci_name(pdev));
f76aec76
KA
1811 return NULL;
1812 }
1813
1814 return iova;
1815}
1816
1817static struct dmar_domain *
1818get_valid_domain_for_dev(struct pci_dev *pdev)
1819{
1820 struct dmar_domain *domain;
1821 int ret;
1822
1823 domain = get_domain_for_dev(pdev,
1824 DEFAULT_DOMAIN_ADDRESS_WIDTH);
1825 if (!domain) {
1826 printk(KERN_ERR
1827 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 1828 return NULL;
ba395927
KA
1829 }
1830
1831 /* make sure context mapping is ok */
1832 if (unlikely(!domain_context_mapped(domain, pdev))) {
1833 ret = domain_context_mapping(domain, pdev);
f76aec76
KA
1834 if (ret) {
1835 printk(KERN_ERR
1836 "Domain context map for %s failed",
1837 pci_name(pdev));
4fe05bbc 1838 return NULL;
f76aec76 1839 }
ba395927
KA
1840 }
1841
f76aec76
KA
1842 return domain;
1843}
1844
1845static dma_addr_t intel_map_single(struct device *hwdev, void *addr,
1846 size_t size, int dir)
1847{
1848 struct pci_dev *pdev = to_pci_dev(hwdev);
1849 int ret;
1850 struct dmar_domain *domain;
1851 unsigned long start_addr;
1852 struct iova *iova;
1853 int prot = 0;
1854
1855 BUG_ON(dir == DMA_NONE);
358dd8ac 1856 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76
KA
1857 return virt_to_bus(addr);
1858
1859 domain = get_valid_domain_for_dev(pdev);
1860 if (!domain)
1861 return 0;
1862
1863 addr = (void *)virt_to_phys(addr);
1864 size = aligned_size((u64)addr, size);
1865
1866 iova = __intel_alloc_iova(hwdev, domain, size);
1867 if (!iova)
1868 goto error;
1869
1870 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1871
ba395927
KA
1872 /*
1873 * Check if DMAR supports zero-length reads on write only
1874 * mappings..
1875 */
1876 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
1877 !cap_zlr(domain->iommu->cap))
1878 prot |= DMA_PTE_READ;
1879 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
1880 prot |= DMA_PTE_WRITE;
1881 /*
1882 * addr - (addr + size) might be partial page, we should map the whole
1883 * page. Note: if two part of one page are separately mapped, we
1884 * might have two guest_addr mapping to the same host addr, but this
1885 * is not a big problem
1886 */
f76aec76
KA
1887 ret = domain_page_mapping(domain, start_addr,
1888 ((u64)addr) & PAGE_MASK_4K, size, prot);
ba395927
KA
1889 if (ret)
1890 goto error;
1891
1892 pr_debug("Device %s request: %lx@%llx mapping: %lx@%llx, dir %d\n",
1893 pci_name(pdev), size, (u64)addr,
f76aec76
KA
1894 size, (u64)start_addr, dir);
1895
1896 /* it's a non-present to present mapping */
1897 ret = iommu_flush_iotlb_psi(domain->iommu, domain->id,
1898 start_addr, size >> PAGE_SHIFT_4K, 1);
1899 if (ret)
1900 iommu_flush_write_buffer(domain->iommu);
1901
1902 return (start_addr + ((u64)addr & (~PAGE_MASK_4K)));
ba395927 1903
ba395927 1904error:
f76aec76
KA
1905 if (iova)
1906 __free_iova(&domain->iovad, iova);
ba395927
KA
1907 printk(KERN_ERR"Device %s request: %lx@%llx dir %d --- failed\n",
1908 pci_name(pdev), size, (u64)addr, dir);
1909 return 0;
1910}
1911
f76aec76 1912static void intel_unmap_single(struct device *dev, dma_addr_t dev_addr,
ba395927
KA
1913 size_t size, int dir)
1914{
ba395927 1915 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76
KA
1916 struct dmar_domain *domain;
1917 unsigned long start_addr;
ba395927
KA
1918 struct iova *iova;
1919
358dd8ac 1920 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
f76aec76 1921 return;
ba395927
KA
1922 domain = find_domain(pdev);
1923 BUG_ON(!domain);
1924
1925 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
f76aec76 1926 if (!iova)
ba395927 1927 return;
ba395927 1928
f76aec76
KA
1929 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
1930 size = aligned_size((u64)dev_addr, size);
ba395927 1931
f76aec76
KA
1932 pr_debug("Device %s unmapping: %lx@%llx\n",
1933 pci_name(pdev), size, (u64)start_addr);
ba395927 1934
f76aec76
KA
1935 /* clear the whole page */
1936 dma_pte_clear_range(domain, start_addr, start_addr + size);
1937 /* free page tables */
1938 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
1939
1940 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
1941 size >> PAGE_SHIFT_4K, 0))
ba395927 1942 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
1943
1944 /* free iova */
1945 __free_iova(&domain->iovad, iova);
ba395927
KA
1946}
1947
1948static void * intel_alloc_coherent(struct device *hwdev, size_t size,
1949 dma_addr_t *dma_handle, gfp_t flags)
1950{
1951 void *vaddr;
1952 int order;
1953
1954 size = PAGE_ALIGN_4K(size);
1955 order = get_order(size);
1956 flags &= ~(GFP_DMA | GFP_DMA32);
1957
1958 vaddr = (void *)__get_free_pages(flags, order);
1959 if (!vaddr)
1960 return NULL;
1961 memset(vaddr, 0, size);
1962
1963 *dma_handle = intel_map_single(hwdev, vaddr, size, DMA_BIDIRECTIONAL);
1964 if (*dma_handle)
1965 return vaddr;
1966 free_pages((unsigned long)vaddr, order);
1967 return NULL;
1968}
1969
1970static void intel_free_coherent(struct device *hwdev, size_t size,
1971 void *vaddr, dma_addr_t dma_handle)
1972{
1973 int order;
1974
1975 size = PAGE_ALIGN_4K(size);
1976 order = get_order(size);
1977
1978 intel_unmap_single(hwdev, dma_handle, size, DMA_BIDIRECTIONAL);
1979 free_pages((unsigned long)vaddr, order);
1980}
1981
12d4d40e 1982#define SG_ENT_VIRT_ADDRESS(sg) (sg_virt((sg)))
c03ab37c 1983static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
ba395927
KA
1984 int nelems, int dir)
1985{
1986 int i;
1987 struct pci_dev *pdev = to_pci_dev(hwdev);
1988 struct dmar_domain *domain;
f76aec76
KA
1989 unsigned long start_addr;
1990 struct iova *iova;
1991 size_t size = 0;
1992 void *addr;
c03ab37c 1993 struct scatterlist *sg;
ba395927 1994
358dd8ac 1995 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
1996 return;
1997
1998 domain = find_domain(pdev);
ba395927 1999
c03ab37c 2000 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
f76aec76
KA
2001 if (!iova)
2002 return;
c03ab37c 2003 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2004 addr = SG_ENT_VIRT_ADDRESS(sg);
2005 size += aligned_size((u64)addr, sg->length);
2006 }
2007
2008 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2009
2010 /* clear the whole page */
2011 dma_pte_clear_range(domain, start_addr, start_addr + size);
2012 /* free page tables */
2013 dma_pte_free_pagetable(domain, start_addr, start_addr + size);
2014
2015 if (iommu_flush_iotlb_psi(domain->iommu, domain->id, start_addr,
2016 size >> PAGE_SHIFT_4K, 0))
ba395927 2017 iommu_flush_write_buffer(domain->iommu);
f76aec76
KA
2018
2019 /* free iova */
2020 __free_iova(&domain->iovad, iova);
ba395927
KA
2021}
2022
ba395927 2023static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2024 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2025{
2026 int i;
c03ab37c 2027 struct scatterlist *sg;
ba395927 2028
c03ab37c 2029 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2030 BUG_ON(!sg_page(sg));
c03ab37c
FT
2031 sg->dma_address = virt_to_bus(SG_ENT_VIRT_ADDRESS(sg));
2032 sg->dma_length = sg->length;
ba395927
KA
2033 }
2034 return nelems;
2035}
2036
c03ab37c
FT
2037static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist,
2038 int nelems, int dir)
ba395927
KA
2039{
2040 void *addr;
2041 int i;
ba395927
KA
2042 struct pci_dev *pdev = to_pci_dev(hwdev);
2043 struct dmar_domain *domain;
f76aec76
KA
2044 size_t size = 0;
2045 int prot = 0;
2046 size_t offset = 0;
2047 struct iova *iova = NULL;
2048 int ret;
c03ab37c 2049 struct scatterlist *sg;
f76aec76 2050 unsigned long start_addr;
ba395927
KA
2051
2052 BUG_ON(dir == DMA_NONE);
358dd8ac 2053 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
c03ab37c 2054 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2055
f76aec76
KA
2056 domain = get_valid_domain_for_dev(pdev);
2057 if (!domain)
2058 return 0;
2059
c03ab37c 2060 for_each_sg(sglist, sg, nelems, i) {
ba395927 2061 addr = SG_ENT_VIRT_ADDRESS(sg);
f76aec76
KA
2062 addr = (void *)virt_to_phys(addr);
2063 size += aligned_size((u64)addr, sg->length);
2064 }
2065
2066 iova = __intel_alloc_iova(hwdev, domain, size);
2067 if (!iova) {
c03ab37c 2068 sglist->dma_length = 0;
f76aec76
KA
2069 return 0;
2070 }
2071
2072 /*
2073 * Check if DMAR supports zero-length reads on write only
2074 * mappings..
2075 */
2076 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
2077 !cap_zlr(domain->iommu->cap))
2078 prot |= DMA_PTE_READ;
2079 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2080 prot |= DMA_PTE_WRITE;
2081
2082 start_addr = iova->pfn_lo << PAGE_SHIFT_4K;
2083 offset = 0;
c03ab37c 2084 for_each_sg(sglist, sg, nelems, i) {
f76aec76
KA
2085 addr = SG_ENT_VIRT_ADDRESS(sg);
2086 addr = (void *)virt_to_phys(addr);
2087 size = aligned_size((u64)addr, sg->length);
2088 ret = domain_page_mapping(domain, start_addr + offset,
2089 ((u64)addr) & PAGE_MASK_4K,
2090 size, prot);
2091 if (ret) {
2092 /* clear the page */
2093 dma_pte_clear_range(domain, start_addr,
2094 start_addr + offset);
2095 /* free page tables */
2096 dma_pte_free_pagetable(domain, start_addr,
2097 start_addr + offset);
2098 /* free iova */
2099 __free_iova(&domain->iovad, iova);
ba395927
KA
2100 return 0;
2101 }
f76aec76
KA
2102 sg->dma_address = start_addr + offset +
2103 ((u64)addr & (~PAGE_MASK_4K));
ba395927 2104 sg->dma_length = sg->length;
f76aec76 2105 offset += size;
ba395927
KA
2106 }
2107
ba395927 2108 /* it's a non-present to present mapping */
f76aec76
KA
2109 if (iommu_flush_iotlb_psi(domain->iommu, domain->id,
2110 start_addr, offset >> PAGE_SHIFT_4K, 1))
ba395927
KA
2111 iommu_flush_write_buffer(domain->iommu);
2112 return nelems;
2113}
2114
2115static struct dma_mapping_ops intel_dma_ops = {
2116 .alloc_coherent = intel_alloc_coherent,
2117 .free_coherent = intel_free_coherent,
2118 .map_single = intel_map_single,
2119 .unmap_single = intel_unmap_single,
2120 .map_sg = intel_map_sg,
2121 .unmap_sg = intel_unmap_sg,
2122};
2123
2124static inline int iommu_domain_cache_init(void)
2125{
2126 int ret = 0;
2127
2128 iommu_domain_cache = kmem_cache_create("iommu_domain",
2129 sizeof(struct dmar_domain),
2130 0,
2131 SLAB_HWCACHE_ALIGN,
2132
2133 NULL);
2134 if (!iommu_domain_cache) {
2135 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2136 ret = -ENOMEM;
2137 }
2138
2139 return ret;
2140}
2141
2142static inline int iommu_devinfo_cache_init(void)
2143{
2144 int ret = 0;
2145
2146 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2147 sizeof(struct device_domain_info),
2148 0,
2149 SLAB_HWCACHE_ALIGN,
2150
2151 NULL);
2152 if (!iommu_devinfo_cache) {
2153 printk(KERN_ERR "Couldn't create devinfo cache\n");
2154 ret = -ENOMEM;
2155 }
2156
2157 return ret;
2158}
2159
2160static inline int iommu_iova_cache_init(void)
2161{
2162 int ret = 0;
2163
2164 iommu_iova_cache = kmem_cache_create("iommu_iova",
2165 sizeof(struct iova),
2166 0,
2167 SLAB_HWCACHE_ALIGN,
2168
2169 NULL);
2170 if (!iommu_iova_cache) {
2171 printk(KERN_ERR "Couldn't create iova cache\n");
2172 ret = -ENOMEM;
2173 }
2174
2175 return ret;
2176}
2177
2178static int __init iommu_init_mempool(void)
2179{
2180 int ret;
2181 ret = iommu_iova_cache_init();
2182 if (ret)
2183 return ret;
2184
2185 ret = iommu_domain_cache_init();
2186 if (ret)
2187 goto domain_error;
2188
2189 ret = iommu_devinfo_cache_init();
2190 if (!ret)
2191 return ret;
2192
2193 kmem_cache_destroy(iommu_domain_cache);
2194domain_error:
2195 kmem_cache_destroy(iommu_iova_cache);
2196
2197 return -ENOMEM;
2198}
2199
2200static void __init iommu_exit_mempool(void)
2201{
2202 kmem_cache_destroy(iommu_devinfo_cache);
2203 kmem_cache_destroy(iommu_domain_cache);
2204 kmem_cache_destroy(iommu_iova_cache);
2205
2206}
2207
2208void __init detect_intel_iommu(void)
2209{
2210 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2211 return;
2212 if (early_dmar_detect()) {
2213 iommu_detected = 1;
2214 }
2215}
2216
2217static void __init init_no_remapping_devices(void)
2218{
2219 struct dmar_drhd_unit *drhd;
2220
2221 for_each_drhd_unit(drhd) {
2222 if (!drhd->include_all) {
2223 int i;
2224 for (i = 0; i < drhd->devices_cnt; i++)
2225 if (drhd->devices[i] != NULL)
2226 break;
2227 /* ignore DMAR unit if no pci devices exist */
2228 if (i == drhd->devices_cnt)
2229 drhd->ignored = 1;
2230 }
2231 }
2232
2233 if (dmar_map_gfx)
2234 return;
2235
2236 for_each_drhd_unit(drhd) {
2237 int i;
2238 if (drhd->ignored || drhd->include_all)
2239 continue;
2240
2241 for (i = 0; i < drhd->devices_cnt; i++)
2242 if (drhd->devices[i] &&
2243 !IS_GFX_DEVICE(drhd->devices[i]))
2244 break;
2245
2246 if (i < drhd->devices_cnt)
2247 continue;
2248
2249 /* bypass IOMMU if it is just for gfx devices */
2250 drhd->ignored = 1;
2251 for (i = 0; i < drhd->devices_cnt; i++) {
2252 if (!drhd->devices[i])
2253 continue;
358dd8ac 2254 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
2255 }
2256 }
2257}
2258
2259int __init intel_iommu_init(void)
2260{
2261 int ret = 0;
2262
2263 if (no_iommu || swiotlb || dmar_disabled)
2264 return -ENODEV;
2265
2266 if (dmar_table_init())
2267 return -ENODEV;
2268
2269 iommu_init_mempool();
2270 dmar_init_reserved_ranges();
2271
2272 init_no_remapping_devices();
2273
2274 ret = init_dmars();
2275 if (ret) {
2276 printk(KERN_ERR "IOMMU: dmar init failed\n");
2277 put_iova_domain(&reserved_iova_list);
2278 iommu_exit_mempool();
2279 return ret;
2280 }
2281 printk(KERN_INFO
2282 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
2283
2284 force_iommu = 1;
2285 dma_ops = &intel_dma_ops;
2286 return 0;
2287}
e820482c 2288