]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/pci/intel-iommu.c
iommu: Allocate dma-remapping structures using numa locality info
[net-next-2.6.git] / drivers / pci / intel-iommu.c
CommitLineData
ba395927
KA
1/*
2 * Copyright (c) 2006, Intel Corporation.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15 * Place - Suite 330, Boston, MA 02111-1307 USA.
16 *
98bcef56 17 * Copyright (C) 2006-2008 Intel Corporation
18 * Author: Ashok Raj <ashok.raj@intel.com>
19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
5b6985ce 21 * Author: Fenghua Yu <fenghua.yu@intel.com>
ba395927
KA
22 */
23
24#include <linux/init.h>
25#include <linux/bitmap.h>
5e0d2a6f 26#include <linux/debugfs.h>
ba395927
KA
27#include <linux/slab.h>
28#include <linux/irq.h>
29#include <linux/interrupt.h>
ba395927
KA
30#include <linux/spinlock.h>
31#include <linux/pci.h>
32#include <linux/dmar.h>
33#include <linux/dma-mapping.h>
34#include <linux/mempool.h>
5e0d2a6f 35#include <linux/timer.h>
38717946 36#include <linux/iova.h>
5d450806 37#include <linux/iommu.h>
38717946 38#include <linux/intel-iommu.h>
f59c7b69 39#include <linux/sysdev.h>
69575d38 40#include <linux/tboot.h>
adb2fe02 41#include <linux/dmi.h>
ba395927 42#include <asm/cacheflush.h>
46a7fa27 43#include <asm/iommu.h>
ba395927
KA
44#include "pci.h"
45
5b6985ce
FY
46#define ROOT_SIZE VTD_PAGE_SIZE
47#define CONTEXT_SIZE VTD_PAGE_SIZE
48
ba395927
KA
49#define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY)
50#define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA)
e0fc7e0b 51#define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e)
ba395927
KA
52
53#define IOAPIC_RANGE_START (0xfee00000)
54#define IOAPIC_RANGE_END (0xfeefffff)
55#define IOVA_START_ADDR (0x1000)
56
57#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
58
4ed0d3e6
FY
59#define MAX_AGAW_WIDTH 64
60
2ebe3151
DW
61#define __DOMAIN_MAX_PFN(gaw) ((((uint64_t)1) << (gaw-VTD_PAGE_SHIFT)) - 1)
62#define __DOMAIN_MAX_ADDR(gaw) ((((uint64_t)1) << gaw) - 1)
63
64/* We limit DOMAIN_MAX_PFN to fit in an unsigned long, and DOMAIN_MAX_ADDR
65 to match. That way, we can use 'unsigned long' for PFNs with impunity. */
66#define DOMAIN_MAX_PFN(gaw) ((unsigned long) min_t(uint64_t, \
67 __DOMAIN_MAX_PFN(gaw), (unsigned long)-1))
68#define DOMAIN_MAX_ADDR(gaw) (((uint64_t)__DOMAIN_MAX_PFN(gaw)) << VTD_PAGE_SHIFT)
ba395927 69
f27be03b 70#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT)
284901a9 71#define DMA_32BIT_PFN IOVA_PFN(DMA_BIT_MASK(32))
6a35528a 72#define DMA_64BIT_PFN IOVA_PFN(DMA_BIT_MASK(64))
5e0d2a6f 73
fd18de50 74
dd4e8319
DW
75/* VT-d pages must always be _smaller_ than MM pages. Otherwise things
76 are never going to work. */
77static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn)
78{
79 return dma_pfn >> (PAGE_SHIFT - VTD_PAGE_SHIFT);
80}
81
82static inline unsigned long mm_to_dma_pfn(unsigned long mm_pfn)
83{
84 return mm_pfn << (PAGE_SHIFT - VTD_PAGE_SHIFT);
85}
86static inline unsigned long page_to_dma_pfn(struct page *pg)
87{
88 return mm_to_dma_pfn(page_to_pfn(pg));
89}
90static inline unsigned long virt_to_dma_pfn(void *p)
91{
92 return page_to_dma_pfn(virt_to_page(p));
93}
94
d9630fe9
WH
95/* global iommu list, set NULL for ignored DMAR units */
96static struct intel_iommu **g_iommus;
97
e0fc7e0b 98static void __init check_tylersburg_isoch(void);
9af88143
DW
99static int rwbf_quirk;
100
46b08e1a
MM
101/*
102 * 0: Present
103 * 1-11: Reserved
104 * 12-63: Context Ptr (12 - (haw-1))
105 * 64-127: Reserved
106 */
107struct root_entry {
108 u64 val;
109 u64 rsvd1;
110};
111#define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry))
112static inline bool root_present(struct root_entry *root)
113{
114 return (root->val & 1);
115}
116static inline void set_root_present(struct root_entry *root)
117{
118 root->val |= 1;
119}
120static inline void set_root_value(struct root_entry *root, unsigned long value)
121{
122 root->val |= value & VTD_PAGE_MASK;
123}
124
125static inline struct context_entry *
126get_context_addr_from_root(struct root_entry *root)
127{
128 return (struct context_entry *)
129 (root_present(root)?phys_to_virt(
130 root->val & VTD_PAGE_MASK) :
131 NULL);
132}
133
7a8fc25e
MM
134/*
135 * low 64 bits:
136 * 0: present
137 * 1: fault processing disable
138 * 2-3: translation type
139 * 12-63: address space root
140 * high 64 bits:
141 * 0-2: address width
142 * 3-6: aval
143 * 8-23: domain id
144 */
145struct context_entry {
146 u64 lo;
147 u64 hi;
148};
c07e7d21
MM
149
150static inline bool context_present(struct context_entry *context)
151{
152 return (context->lo & 1);
153}
154static inline void context_set_present(struct context_entry *context)
155{
156 context->lo |= 1;
157}
158
159static inline void context_set_fault_enable(struct context_entry *context)
160{
161 context->lo &= (((u64)-1) << 2) | 1;
162}
163
c07e7d21
MM
164static inline void context_set_translation_type(struct context_entry *context,
165 unsigned long value)
166{
167 context->lo &= (((u64)-1) << 4) | 3;
168 context->lo |= (value & 3) << 2;
169}
170
171static inline void context_set_address_root(struct context_entry *context,
172 unsigned long value)
173{
174 context->lo |= value & VTD_PAGE_MASK;
175}
176
177static inline void context_set_address_width(struct context_entry *context,
178 unsigned long value)
179{
180 context->hi |= value & 7;
181}
182
183static inline void context_set_domain_id(struct context_entry *context,
184 unsigned long value)
185{
186 context->hi |= (value & ((1 << 16) - 1)) << 8;
187}
188
189static inline void context_clear_entry(struct context_entry *context)
190{
191 context->lo = 0;
192 context->hi = 0;
193}
7a8fc25e 194
622ba12a
MM
195/*
196 * 0: readable
197 * 1: writable
198 * 2-6: reserved
199 * 7: super page
9cf06697
SY
200 * 8-10: available
201 * 11: snoop behavior
622ba12a
MM
202 * 12-63: Host physcial address
203 */
204struct dma_pte {
205 u64 val;
206};
622ba12a 207
19c239ce
MM
208static inline void dma_clear_pte(struct dma_pte *pte)
209{
210 pte->val = 0;
211}
212
213static inline void dma_set_pte_readable(struct dma_pte *pte)
214{
215 pte->val |= DMA_PTE_READ;
216}
217
218static inline void dma_set_pte_writable(struct dma_pte *pte)
219{
220 pte->val |= DMA_PTE_WRITE;
221}
222
9cf06697
SY
223static inline void dma_set_pte_snp(struct dma_pte *pte)
224{
225 pte->val |= DMA_PTE_SNP;
226}
227
19c239ce
MM
228static inline void dma_set_pte_prot(struct dma_pte *pte, unsigned long prot)
229{
230 pte->val = (pte->val & ~3) | (prot & 3);
231}
232
233static inline u64 dma_pte_addr(struct dma_pte *pte)
234{
c85994e4
DW
235#ifdef CONFIG_64BIT
236 return pte->val & VTD_PAGE_MASK;
237#else
238 /* Must have a full atomic 64-bit read */
239 return __cmpxchg64(pte, 0ULL, 0ULL) & VTD_PAGE_MASK;
240#endif
19c239ce
MM
241}
242
dd4e8319 243static inline void dma_set_pte_pfn(struct dma_pte *pte, unsigned long pfn)
19c239ce 244{
dd4e8319 245 pte->val |= (uint64_t)pfn << VTD_PAGE_SHIFT;
19c239ce
MM
246}
247
248static inline bool dma_pte_present(struct dma_pte *pte)
249{
250 return (pte->val & 3) != 0;
251}
622ba12a 252
75e6bf96
DW
253static inline int first_pte_in_page(struct dma_pte *pte)
254{
255 return !((unsigned long)pte & ~VTD_PAGE_MASK);
256}
257
2c2e2c38
FY
258/*
259 * This domain is a statically identity mapping domain.
260 * 1. This domain creats a static 1:1 mapping to all usable memory.
261 * 2. It maps to each iommu if successful.
262 * 3. Each iommu mapps to this domain if successful.
263 */
19943b0e
DW
264static struct dmar_domain *si_domain;
265static int hw_pass_through = 1;
2c2e2c38 266
3b5410e7 267/* devices under the same p2p bridge are owned in one domain */
cdc7b837 268#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 << 0)
3b5410e7 269
1ce28feb
WH
270/* domain represents a virtual machine, more than one devices
271 * across iommus may be owned in one domain, e.g. kvm guest.
272 */
273#define DOMAIN_FLAG_VIRTUAL_MACHINE (1 << 1)
274
2c2e2c38
FY
275/* si_domain contains mulitple devices */
276#define DOMAIN_FLAG_STATIC_IDENTITY (1 << 2)
277
99126f7c
MM
278struct dmar_domain {
279 int id; /* domain id */
4c923d47 280 int nid; /* node id */
8c11e798 281 unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/
99126f7c
MM
282
283 struct list_head devices; /* all devices' list */
284 struct iova_domain iovad; /* iova's that belong to this domain */
285
286 struct dma_pte *pgd; /* virtual address */
99126f7c
MM
287 int gaw; /* max guest address width */
288
289 /* adjusted guest address width, 0 is level 2 30-bit */
290 int agaw;
291
3b5410e7 292 int flags; /* flags to find out type of domain */
8e604097
WH
293
294 int iommu_coherency;/* indicate coherency of iommu access */
58c610bd 295 int iommu_snooping; /* indicate snooping control feature*/
c7151a8d
WH
296 int iommu_count; /* reference count of iommu */
297 spinlock_t iommu_lock; /* protect iommu set in domain */
fe40f1e0 298 u64 max_addr; /* maximum mapped address */
99126f7c
MM
299};
300
a647dacb
MM
301/* PCI domain-device relationship */
302struct device_domain_info {
303 struct list_head link; /* link to domain siblings */
304 struct list_head global; /* link to global list */
276dbf99
DW
305 int segment; /* PCI domain */
306 u8 bus; /* PCI bus number */
a647dacb
MM
307 u8 devfn; /* PCI devfn number */
308 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
93a23a72 309 struct intel_iommu *iommu; /* IOMMU used by this device */
a647dacb
MM
310 struct dmar_domain *domain; /* pointer to domain */
311};
312
5e0d2a6f 313static void flush_unmaps_timeout(unsigned long data);
314
315DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
316
80b20dd8 317#define HIGH_WATER_MARK 250
318struct deferred_flush_tables {
319 int next;
320 struct iova *iova[HIGH_WATER_MARK];
321 struct dmar_domain *domain[HIGH_WATER_MARK];
322};
323
324static struct deferred_flush_tables *deferred_flush;
325
5e0d2a6f 326/* bitmap for indexing intel_iommus */
5e0d2a6f 327static int g_num_of_iommus;
328
329static DEFINE_SPINLOCK(async_umap_flush_lock);
330static LIST_HEAD(unmaps_to_do);
331
332static int timer_on;
333static long list_size;
5e0d2a6f 334
ba395927
KA
335static void domain_remove_dev_info(struct dmar_domain *domain);
336
0cd5c3c8
KM
337#ifdef CONFIG_DMAR_DEFAULT_ON
338int dmar_disabled = 0;
339#else
340int dmar_disabled = 1;
341#endif /*CONFIG_DMAR_DEFAULT_ON*/
342
ba395927 343static int __initdata dmar_map_gfx = 1;
7d3b03ce 344static int dmar_forcedac;
5e0d2a6f 345static int intel_iommu_strict;
ba395927
KA
346
347#define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1))
348static DEFINE_SPINLOCK(device_domain_lock);
349static LIST_HEAD(device_domain_list);
350
a8bcbb0d
JR
351static struct iommu_ops intel_iommu_ops;
352
ba395927
KA
353static int __init intel_iommu_setup(char *str)
354{
355 if (!str)
356 return -EINVAL;
357 while (*str) {
0cd5c3c8
KM
358 if (!strncmp(str, "on", 2)) {
359 dmar_disabled = 0;
360 printk(KERN_INFO "Intel-IOMMU: enabled\n");
361 } else if (!strncmp(str, "off", 3)) {
ba395927 362 dmar_disabled = 1;
0cd5c3c8 363 printk(KERN_INFO "Intel-IOMMU: disabled\n");
ba395927
KA
364 } else if (!strncmp(str, "igfx_off", 8)) {
365 dmar_map_gfx = 0;
366 printk(KERN_INFO
367 "Intel-IOMMU: disable GFX device mapping\n");
7d3b03ce 368 } else if (!strncmp(str, "forcedac", 8)) {
5e0d2a6f 369 printk(KERN_INFO
7d3b03ce
KA
370 "Intel-IOMMU: Forcing DAC for PCI devices\n");
371 dmar_forcedac = 1;
5e0d2a6f 372 } else if (!strncmp(str, "strict", 6)) {
373 printk(KERN_INFO
374 "Intel-IOMMU: disable batched IOTLB flush\n");
375 intel_iommu_strict = 1;
ba395927
KA
376 }
377
378 str += strcspn(str, ",");
379 while (*str == ',')
380 str++;
381 }
382 return 0;
383}
384__setup("intel_iommu=", intel_iommu_setup);
385
386static struct kmem_cache *iommu_domain_cache;
387static struct kmem_cache *iommu_devinfo_cache;
388static struct kmem_cache *iommu_iova_cache;
389
eb3fa7cb
KA
390static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep)
391{
392 unsigned int flags;
393 void *vaddr;
394
395 /* trying to avoid low memory issues */
396 flags = current->flags & PF_MEMALLOC;
397 current->flags |= PF_MEMALLOC;
398 vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC);
399 current->flags &= (~PF_MEMALLOC | flags);
400 return vaddr;
401}
402
403
4c923d47 404static inline void *alloc_pgtable_page(int node)
ba395927 405{
eb3fa7cb 406 unsigned int flags;
4c923d47
SS
407 struct page *page;
408 void *vaddr = NULL;
eb3fa7cb
KA
409
410 /* trying to avoid low memory issues */
411 flags = current->flags & PF_MEMALLOC;
412 current->flags |= PF_MEMALLOC;
4c923d47
SS
413 page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0);
414 if (page)
415 vaddr = page_address(page);
eb3fa7cb
KA
416 current->flags &= (~PF_MEMALLOC | flags);
417 return vaddr;
ba395927
KA
418}
419
420static inline void free_pgtable_page(void *vaddr)
421{
422 free_page((unsigned long)vaddr);
423}
424
425static inline void *alloc_domain_mem(void)
426{
eb3fa7cb 427 return iommu_kmem_cache_alloc(iommu_domain_cache);
ba395927
KA
428}
429
38717946 430static void free_domain_mem(void *vaddr)
ba395927
KA
431{
432 kmem_cache_free(iommu_domain_cache, vaddr);
433}
434
435static inline void * alloc_devinfo_mem(void)
436{
eb3fa7cb 437 return iommu_kmem_cache_alloc(iommu_devinfo_cache);
ba395927
KA
438}
439
440static inline void free_devinfo_mem(void *vaddr)
441{
442 kmem_cache_free(iommu_devinfo_cache, vaddr);
443}
444
445struct iova *alloc_iova_mem(void)
446{
eb3fa7cb 447 return iommu_kmem_cache_alloc(iommu_iova_cache);
ba395927
KA
448}
449
450void free_iova_mem(struct iova *iova)
451{
452 kmem_cache_free(iommu_iova_cache, iova);
453}
454
1b573683
WH
455
456static inline int width_to_agaw(int width);
457
4ed0d3e6 458static int __iommu_calculate_agaw(struct intel_iommu *iommu, int max_gaw)
1b573683
WH
459{
460 unsigned long sagaw;
461 int agaw = -1;
462
463 sagaw = cap_sagaw(iommu->cap);
4ed0d3e6 464 for (agaw = width_to_agaw(max_gaw);
1b573683
WH
465 agaw >= 0; agaw--) {
466 if (test_bit(agaw, &sagaw))
467 break;
468 }
469
470 return agaw;
471}
472
4ed0d3e6
FY
473/*
474 * Calculate max SAGAW for each iommu.
475 */
476int iommu_calculate_max_sagaw(struct intel_iommu *iommu)
477{
478 return __iommu_calculate_agaw(iommu, MAX_AGAW_WIDTH);
479}
480
481/*
482 * calculate agaw for each iommu.
483 * "SAGAW" may be different across iommus, use a default agaw, and
484 * get a supported less agaw for iommus that don't support the default agaw.
485 */
486int iommu_calculate_agaw(struct intel_iommu *iommu)
487{
488 return __iommu_calculate_agaw(iommu, DEFAULT_DOMAIN_ADDRESS_WIDTH);
489}
490
2c2e2c38 491/* This functionin only returns single iommu in a domain */
8c11e798
WH
492static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain)
493{
494 int iommu_id;
495
2c2e2c38 496 /* si_domain and vm domain should not get here. */
1ce28feb 497 BUG_ON(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE);
2c2e2c38 498 BUG_ON(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY);
1ce28feb 499
8c11e798
WH
500 iommu_id = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
501 if (iommu_id < 0 || iommu_id >= g_num_of_iommus)
502 return NULL;
503
504 return g_iommus[iommu_id];
505}
506
8e604097
WH
507static void domain_update_iommu_coherency(struct dmar_domain *domain)
508{
509 int i;
510
511 domain->iommu_coherency = 1;
512
513 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
514 for (; i < g_num_of_iommus; ) {
515 if (!ecap_coherent(g_iommus[i]->ecap)) {
516 domain->iommu_coherency = 0;
517 break;
518 }
519 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
520 }
521}
522
58c610bd
SY
523static void domain_update_iommu_snooping(struct dmar_domain *domain)
524{
525 int i;
526
527 domain->iommu_snooping = 1;
528
529 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
530 for (; i < g_num_of_iommus; ) {
531 if (!ecap_sc_support(g_iommus[i]->ecap)) {
532 domain->iommu_snooping = 0;
533 break;
534 }
535 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
536 }
537}
538
539/* Some capabilities may be different across iommus */
540static void domain_update_iommu_cap(struct dmar_domain *domain)
541{
542 domain_update_iommu_coherency(domain);
543 domain_update_iommu_snooping(domain);
544}
545
276dbf99 546static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn)
c7151a8d
WH
547{
548 struct dmar_drhd_unit *drhd = NULL;
549 int i;
550
551 for_each_drhd_unit(drhd) {
552 if (drhd->ignored)
553 continue;
276dbf99
DW
554 if (segment != drhd->segment)
555 continue;
c7151a8d 556
924b6231 557 for (i = 0; i < drhd->devices_cnt; i++) {
288e4877
DH
558 if (drhd->devices[i] &&
559 drhd->devices[i]->bus->number == bus &&
c7151a8d
WH
560 drhd->devices[i]->devfn == devfn)
561 return drhd->iommu;
4958c5dc
DW
562 if (drhd->devices[i] &&
563 drhd->devices[i]->subordinate &&
924b6231
DW
564 drhd->devices[i]->subordinate->number <= bus &&
565 drhd->devices[i]->subordinate->subordinate >= bus)
566 return drhd->iommu;
567 }
c7151a8d
WH
568
569 if (drhd->include_all)
570 return drhd->iommu;
571 }
572
573 return NULL;
574}
575
5331fe6f
WH
576static void domain_flush_cache(struct dmar_domain *domain,
577 void *addr, int size)
578{
579 if (!domain->iommu_coherency)
580 clflush_cache_range(addr, size);
581}
582
ba395927
KA
583/* Gets context entry for a given bus and devfn */
584static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
585 u8 bus, u8 devfn)
586{
587 struct root_entry *root;
588 struct context_entry *context;
589 unsigned long phy_addr;
590 unsigned long flags;
591
592 spin_lock_irqsave(&iommu->lock, flags);
593 root = &iommu->root_entry[bus];
594 context = get_context_addr_from_root(root);
595 if (!context) {
4c923d47
SS
596 context = (struct context_entry *)
597 alloc_pgtable_page(iommu->node);
ba395927
KA
598 if (!context) {
599 spin_unlock_irqrestore(&iommu->lock, flags);
600 return NULL;
601 }
5b6985ce 602 __iommu_flush_cache(iommu, (void *)context, CONTEXT_SIZE);
ba395927
KA
603 phy_addr = virt_to_phys((void *)context);
604 set_root_value(root, phy_addr);
605 set_root_present(root);
606 __iommu_flush_cache(iommu, root, sizeof(*root));
607 }
608 spin_unlock_irqrestore(&iommu->lock, flags);
609 return &context[devfn];
610}
611
612static int device_context_mapped(struct intel_iommu *iommu, u8 bus, u8 devfn)
613{
614 struct root_entry *root;
615 struct context_entry *context;
616 int ret;
617 unsigned long flags;
618
619 spin_lock_irqsave(&iommu->lock, flags);
620 root = &iommu->root_entry[bus];
621 context = get_context_addr_from_root(root);
622 if (!context) {
623 ret = 0;
624 goto out;
625 }
c07e7d21 626 ret = context_present(&context[devfn]);
ba395927
KA
627out:
628 spin_unlock_irqrestore(&iommu->lock, flags);
629 return ret;
630}
631
632static void clear_context_table(struct intel_iommu *iommu, u8 bus, u8 devfn)
633{
634 struct root_entry *root;
635 struct context_entry *context;
636 unsigned long flags;
637
638 spin_lock_irqsave(&iommu->lock, flags);
639 root = &iommu->root_entry[bus];
640 context = get_context_addr_from_root(root);
641 if (context) {
c07e7d21 642 context_clear_entry(&context[devfn]);
ba395927
KA
643 __iommu_flush_cache(iommu, &context[devfn], \
644 sizeof(*context));
645 }
646 spin_unlock_irqrestore(&iommu->lock, flags);
647}
648
649static void free_context_table(struct intel_iommu *iommu)
650{
651 struct root_entry *root;
652 int i;
653 unsigned long flags;
654 struct context_entry *context;
655
656 spin_lock_irqsave(&iommu->lock, flags);
657 if (!iommu->root_entry) {
658 goto out;
659 }
660 for (i = 0; i < ROOT_ENTRY_NR; i++) {
661 root = &iommu->root_entry[i];
662 context = get_context_addr_from_root(root);
663 if (context)
664 free_pgtable_page(context);
665 }
666 free_pgtable_page(iommu->root_entry);
667 iommu->root_entry = NULL;
668out:
669 spin_unlock_irqrestore(&iommu->lock, flags);
670}
671
672/* page table handling */
673#define LEVEL_STRIDE (9)
674#define LEVEL_MASK (((u64)1 << LEVEL_STRIDE) - 1)
675
676static inline int agaw_to_level(int agaw)
677{
678 return agaw + 2;
679}
680
681static inline int agaw_to_width(int agaw)
682{
683 return 30 + agaw * LEVEL_STRIDE;
684
685}
686
687static inline int width_to_agaw(int width)
688{
689 return (width - 30) / LEVEL_STRIDE;
690}
691
692static inline unsigned int level_to_offset_bits(int level)
693{
6660c63a 694 return (level - 1) * LEVEL_STRIDE;
ba395927
KA
695}
696
77dfa56c 697static inline int pfn_level_offset(unsigned long pfn, int level)
ba395927 698{
6660c63a 699 return (pfn >> level_to_offset_bits(level)) & LEVEL_MASK;
ba395927
KA
700}
701
6660c63a 702static inline unsigned long level_mask(int level)
ba395927 703{
6660c63a 704 return -1UL << level_to_offset_bits(level);
ba395927
KA
705}
706
6660c63a 707static inline unsigned long level_size(int level)
ba395927 708{
6660c63a 709 return 1UL << level_to_offset_bits(level);
ba395927
KA
710}
711
6660c63a 712static inline unsigned long align_to_level(unsigned long pfn, int level)
ba395927 713{
6660c63a 714 return (pfn + level_size(level) - 1) & level_mask(level);
ba395927
KA
715}
716
b026fd28
DW
717static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain,
718 unsigned long pfn)
ba395927 719{
b026fd28 720 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
ba395927
KA
721 struct dma_pte *parent, *pte = NULL;
722 int level = agaw_to_level(domain->agaw);
723 int offset;
ba395927
KA
724
725 BUG_ON(!domain->pgd);
b026fd28 726 BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width);
ba395927
KA
727 parent = domain->pgd;
728
ba395927
KA
729 while (level > 0) {
730 void *tmp_page;
731
b026fd28 732 offset = pfn_level_offset(pfn, level);
ba395927
KA
733 pte = &parent[offset];
734 if (level == 1)
735 break;
736
19c239ce 737 if (!dma_pte_present(pte)) {
c85994e4
DW
738 uint64_t pteval;
739
4c923d47 740 tmp_page = alloc_pgtable_page(domain->nid);
ba395927 741
206a73c1 742 if (!tmp_page)
ba395927 743 return NULL;
206a73c1 744
c85994e4 745 domain_flush_cache(domain, tmp_page, VTD_PAGE_SIZE);
64de5af0 746 pteval = ((uint64_t)virt_to_dma_pfn(tmp_page) << VTD_PAGE_SHIFT) | DMA_PTE_READ | DMA_PTE_WRITE;
c85994e4
DW
747 if (cmpxchg64(&pte->val, 0ULL, pteval)) {
748 /* Someone else set it while we were thinking; use theirs. */
749 free_pgtable_page(tmp_page);
750 } else {
751 dma_pte_addr(pte);
752 domain_flush_cache(domain, pte, sizeof(*pte));
753 }
ba395927 754 }
19c239ce 755 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
756 level--;
757 }
758
ba395927
KA
759 return pte;
760}
761
762/* return address's pte at specific level */
90dcfb5e
DW
763static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain,
764 unsigned long pfn,
765 int level)
ba395927
KA
766{
767 struct dma_pte *parent, *pte = NULL;
768 int total = agaw_to_level(domain->agaw);
769 int offset;
770
771 parent = domain->pgd;
772 while (level <= total) {
90dcfb5e 773 offset = pfn_level_offset(pfn, total);
ba395927
KA
774 pte = &parent[offset];
775 if (level == total)
776 return pte;
777
19c239ce 778 if (!dma_pte_present(pte))
ba395927 779 break;
19c239ce 780 parent = phys_to_virt(dma_pte_addr(pte));
ba395927
KA
781 total--;
782 }
783 return NULL;
784}
785
ba395927 786/* clear last level pte, a tlb flush should be followed */
595badf5
DW
787static void dma_pte_clear_range(struct dmar_domain *domain,
788 unsigned long start_pfn,
789 unsigned long last_pfn)
ba395927 790{
04b18e65 791 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
310a5ab9 792 struct dma_pte *first_pte, *pte;
66eae846 793
04b18e65 794 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
595badf5 795 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 796 BUG_ON(start_pfn > last_pfn);
ba395927 797
04b18e65 798 /* we don't need lock here; nobody else touches the iova range */
59c36286 799 do {
310a5ab9
DW
800 first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1);
801 if (!pte) {
802 start_pfn = align_to_level(start_pfn + 1, 2);
803 continue;
804 }
75e6bf96 805 do {
310a5ab9
DW
806 dma_clear_pte(pte);
807 start_pfn++;
808 pte++;
75e6bf96
DW
809 } while (start_pfn <= last_pfn && !first_pte_in_page(pte));
810
310a5ab9
DW
811 domain_flush_cache(domain, first_pte,
812 (void *)pte - (void *)first_pte);
59c36286
DW
813
814 } while (start_pfn && start_pfn <= last_pfn);
ba395927
KA
815}
816
817/* free page table pages. last level pte should already be cleared */
818static void dma_pte_free_pagetable(struct dmar_domain *domain,
d794dc9b
DW
819 unsigned long start_pfn,
820 unsigned long last_pfn)
ba395927 821{
6660c63a 822 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
f3a0a52f 823 struct dma_pte *first_pte, *pte;
ba395927
KA
824 int total = agaw_to_level(domain->agaw);
825 int level;
6660c63a 826 unsigned long tmp;
ba395927 827
6660c63a
DW
828 BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width);
829 BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width);
59c36286 830 BUG_ON(start_pfn > last_pfn);
ba395927 831
f3a0a52f 832 /* We don't need lock here; nobody else touches the iova range */
ba395927
KA
833 level = 2;
834 while (level <= total) {
6660c63a
DW
835 tmp = align_to_level(start_pfn, level);
836
f3a0a52f 837 /* If we can't even clear one PTE at this level, we're done */
6660c63a 838 if (tmp + level_size(level) - 1 > last_pfn)
ba395927
KA
839 return;
840
59c36286 841 do {
f3a0a52f
DW
842 first_pte = pte = dma_pfn_level_pte(domain, tmp, level);
843 if (!pte) {
844 tmp = align_to_level(tmp + 1, level + 1);
845 continue;
846 }
75e6bf96 847 do {
6a43e574
DW
848 if (dma_pte_present(pte)) {
849 free_pgtable_page(phys_to_virt(dma_pte_addr(pte)));
850 dma_clear_pte(pte);
851 }
f3a0a52f
DW
852 pte++;
853 tmp += level_size(level);
75e6bf96
DW
854 } while (!first_pte_in_page(pte) &&
855 tmp + level_size(level) - 1 <= last_pfn);
856
f3a0a52f
DW
857 domain_flush_cache(domain, first_pte,
858 (void *)pte - (void *)first_pte);
859
59c36286 860 } while (tmp && tmp + level_size(level) - 1 <= last_pfn);
ba395927
KA
861 level++;
862 }
863 /* free pgd */
d794dc9b 864 if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
ba395927
KA
865 free_pgtable_page(domain->pgd);
866 domain->pgd = NULL;
867 }
868}
869
870/* iommu handling */
871static int iommu_alloc_root_entry(struct intel_iommu *iommu)
872{
873 struct root_entry *root;
874 unsigned long flags;
875
4c923d47 876 root = (struct root_entry *)alloc_pgtable_page(iommu->node);
ba395927
KA
877 if (!root)
878 return -ENOMEM;
879
5b6985ce 880 __iommu_flush_cache(iommu, root, ROOT_SIZE);
ba395927
KA
881
882 spin_lock_irqsave(&iommu->lock, flags);
883 iommu->root_entry = root;
884 spin_unlock_irqrestore(&iommu->lock, flags);
885
886 return 0;
887}
888
ba395927
KA
889static void iommu_set_root_entry(struct intel_iommu *iommu)
890{
891 void *addr;
c416daa9 892 u32 sts;
ba395927
KA
893 unsigned long flag;
894
895 addr = iommu->root_entry;
896
897 spin_lock_irqsave(&iommu->register_lock, flag);
898 dmar_writeq(iommu->reg + DMAR_RTADDR_REG, virt_to_phys(addr));
899
c416daa9 900 writel(iommu->gcmd | DMA_GCMD_SRTP, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
901
902 /* Make sure hardware complete it */
903 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 904 readl, (sts & DMA_GSTS_RTPS), sts);
ba395927
KA
905
906 spin_unlock_irqrestore(&iommu->register_lock, flag);
907}
908
909static void iommu_flush_write_buffer(struct intel_iommu *iommu)
910{
911 u32 val;
912 unsigned long flag;
913
9af88143 914 if (!rwbf_quirk && !cap_rwbf(iommu->cap))
ba395927 915 return;
ba395927
KA
916
917 spin_lock_irqsave(&iommu->register_lock, flag);
462b60f6 918 writel(iommu->gcmd | DMA_GCMD_WBF, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
919
920 /* Make sure hardware complete it */
921 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 922 readl, (!(val & DMA_GSTS_WBFS)), val);
ba395927
KA
923
924 spin_unlock_irqrestore(&iommu->register_lock, flag);
925}
926
927/* return value determine if we need a write buffer flush */
4c25a2c1
DW
928static void __iommu_flush_context(struct intel_iommu *iommu,
929 u16 did, u16 source_id, u8 function_mask,
930 u64 type)
ba395927
KA
931{
932 u64 val = 0;
933 unsigned long flag;
934
ba395927
KA
935 switch (type) {
936 case DMA_CCMD_GLOBAL_INVL:
937 val = DMA_CCMD_GLOBAL_INVL;
938 break;
939 case DMA_CCMD_DOMAIN_INVL:
940 val = DMA_CCMD_DOMAIN_INVL|DMA_CCMD_DID(did);
941 break;
942 case DMA_CCMD_DEVICE_INVL:
943 val = DMA_CCMD_DEVICE_INVL|DMA_CCMD_DID(did)
944 | DMA_CCMD_SID(source_id) | DMA_CCMD_FM(function_mask);
945 break;
946 default:
947 BUG();
948 }
949 val |= DMA_CCMD_ICC;
950
951 spin_lock_irqsave(&iommu->register_lock, flag);
952 dmar_writeq(iommu->reg + DMAR_CCMD_REG, val);
953
954 /* Make sure hardware complete it */
955 IOMMU_WAIT_OP(iommu, DMAR_CCMD_REG,
956 dmar_readq, (!(val & DMA_CCMD_ICC)), val);
957
958 spin_unlock_irqrestore(&iommu->register_lock, flag);
ba395927
KA
959}
960
ba395927 961/* return value determine if we need a write buffer flush */
1f0ef2aa
DW
962static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did,
963 u64 addr, unsigned int size_order, u64 type)
ba395927
KA
964{
965 int tlb_offset = ecap_iotlb_offset(iommu->ecap);
966 u64 val = 0, val_iva = 0;
967 unsigned long flag;
968
ba395927
KA
969 switch (type) {
970 case DMA_TLB_GLOBAL_FLUSH:
971 /* global flush doesn't need set IVA_REG */
972 val = DMA_TLB_GLOBAL_FLUSH|DMA_TLB_IVT;
973 break;
974 case DMA_TLB_DSI_FLUSH:
975 val = DMA_TLB_DSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
976 break;
977 case DMA_TLB_PSI_FLUSH:
978 val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did);
979 /* Note: always flush non-leaf currently */
980 val_iva = size_order | addr;
981 break;
982 default:
983 BUG();
984 }
985 /* Note: set drain read/write */
986#if 0
987 /*
988 * This is probably to be super secure.. Looks like we can
989 * ignore it without any impact.
990 */
991 if (cap_read_drain(iommu->cap))
992 val |= DMA_TLB_READ_DRAIN;
993#endif
994 if (cap_write_drain(iommu->cap))
995 val |= DMA_TLB_WRITE_DRAIN;
996
997 spin_lock_irqsave(&iommu->register_lock, flag);
998 /* Note: Only uses first TLB reg currently */
999 if (val_iva)
1000 dmar_writeq(iommu->reg + tlb_offset, val_iva);
1001 dmar_writeq(iommu->reg + tlb_offset + 8, val);
1002
1003 /* Make sure hardware complete it */
1004 IOMMU_WAIT_OP(iommu, tlb_offset + 8,
1005 dmar_readq, (!(val & DMA_TLB_IVT)), val);
1006
1007 spin_unlock_irqrestore(&iommu->register_lock, flag);
1008
1009 /* check IOTLB invalidation granularity */
1010 if (DMA_TLB_IAIG(val) == 0)
1011 printk(KERN_ERR"IOMMU: flush IOTLB failed\n");
1012 if (DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type))
1013 pr_debug("IOMMU: tlb flush request %Lx, actual %Lx\n",
5b6985ce
FY
1014 (unsigned long long)DMA_TLB_IIRG(type),
1015 (unsigned long long)DMA_TLB_IAIG(val));
ba395927
KA
1016}
1017
93a23a72
YZ
1018static struct device_domain_info *iommu_support_dev_iotlb(
1019 struct dmar_domain *domain, int segment, u8 bus, u8 devfn)
1020{
1021 int found = 0;
1022 unsigned long flags;
1023 struct device_domain_info *info;
1024 struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn);
1025
1026 if (!ecap_dev_iotlb_support(iommu->ecap))
1027 return NULL;
1028
1029 if (!iommu->qi)
1030 return NULL;
1031
1032 spin_lock_irqsave(&device_domain_lock, flags);
1033 list_for_each_entry(info, &domain->devices, link)
1034 if (info->bus == bus && info->devfn == devfn) {
1035 found = 1;
1036 break;
1037 }
1038 spin_unlock_irqrestore(&device_domain_lock, flags);
1039
1040 if (!found || !info->dev)
1041 return NULL;
1042
1043 if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS))
1044 return NULL;
1045
1046 if (!dmar_find_matched_atsr_unit(info->dev))
1047 return NULL;
1048
1049 info->iommu = iommu;
1050
1051 return info;
1052}
1053
1054static void iommu_enable_dev_iotlb(struct device_domain_info *info)
ba395927 1055{
93a23a72
YZ
1056 if (!info)
1057 return;
1058
1059 pci_enable_ats(info->dev, VTD_PAGE_SHIFT);
1060}
1061
1062static void iommu_disable_dev_iotlb(struct device_domain_info *info)
1063{
1064 if (!info->dev || !pci_ats_enabled(info->dev))
1065 return;
1066
1067 pci_disable_ats(info->dev);
1068}
1069
1070static void iommu_flush_dev_iotlb(struct dmar_domain *domain,
1071 u64 addr, unsigned mask)
1072{
1073 u16 sid, qdep;
1074 unsigned long flags;
1075 struct device_domain_info *info;
1076
1077 spin_lock_irqsave(&device_domain_lock, flags);
1078 list_for_each_entry(info, &domain->devices, link) {
1079 if (!info->dev || !pci_ats_enabled(info->dev))
1080 continue;
1081
1082 sid = info->bus << 8 | info->devfn;
1083 qdep = pci_ats_queue_depth(info->dev);
1084 qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask);
1085 }
1086 spin_unlock_irqrestore(&device_domain_lock, flags);
1087}
1088
1f0ef2aa 1089static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did,
03d6a246 1090 unsigned long pfn, unsigned int pages)
ba395927 1091{
9dd2fe89 1092 unsigned int mask = ilog2(__roundup_pow_of_two(pages));
03d6a246 1093 uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT;
ba395927 1094
ba395927
KA
1095 BUG_ON(pages == 0);
1096
ba395927 1097 /*
9dd2fe89
YZ
1098 * Fallback to domain selective flush if no PSI support or the size is
1099 * too big.
ba395927
KA
1100 * PSI requires page size to be 2 ^ x, and the base address is naturally
1101 * aligned to the size
1102 */
9dd2fe89
YZ
1103 if (!cap_pgsel_inv(iommu->cap) || mask > cap_max_amask_val(iommu->cap))
1104 iommu->flush.flush_iotlb(iommu, did, 0, 0,
1f0ef2aa 1105 DMA_TLB_DSI_FLUSH);
9dd2fe89
YZ
1106 else
1107 iommu->flush.flush_iotlb(iommu, did, addr, mask,
1108 DMA_TLB_PSI_FLUSH);
bf92df30
YZ
1109
1110 /*
1111 * In caching mode, domain ID 0 is reserved for non-present to present
1112 * mapping flush. Device IOTLB doesn't need to be flushed in this case.
1113 */
1114 if (!cap_caching_mode(iommu->cap) || did)
93a23a72 1115 iommu_flush_dev_iotlb(iommu->domains[did], addr, mask);
ba395927
KA
1116}
1117
f8bab735 1118static void iommu_disable_protect_mem_regions(struct intel_iommu *iommu)
1119{
1120 u32 pmen;
1121 unsigned long flags;
1122
1123 spin_lock_irqsave(&iommu->register_lock, flags);
1124 pmen = readl(iommu->reg + DMAR_PMEN_REG);
1125 pmen &= ~DMA_PMEN_EPM;
1126 writel(pmen, iommu->reg + DMAR_PMEN_REG);
1127
1128 /* wait for the protected region status bit to clear */
1129 IOMMU_WAIT_OP(iommu, DMAR_PMEN_REG,
1130 readl, !(pmen & DMA_PMEN_PRS), pmen);
1131
1132 spin_unlock_irqrestore(&iommu->register_lock, flags);
1133}
1134
ba395927
KA
1135static int iommu_enable_translation(struct intel_iommu *iommu)
1136{
1137 u32 sts;
1138 unsigned long flags;
1139
1140 spin_lock_irqsave(&iommu->register_lock, flags);
c416daa9
DW
1141 iommu->gcmd |= DMA_GCMD_TE;
1142 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
ba395927
KA
1143
1144 /* Make sure hardware complete it */
1145 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1146 readl, (sts & DMA_GSTS_TES), sts);
ba395927 1147
ba395927
KA
1148 spin_unlock_irqrestore(&iommu->register_lock, flags);
1149 return 0;
1150}
1151
1152static int iommu_disable_translation(struct intel_iommu *iommu)
1153{
1154 u32 sts;
1155 unsigned long flag;
1156
1157 spin_lock_irqsave(&iommu->register_lock, flag);
1158 iommu->gcmd &= ~DMA_GCMD_TE;
1159 writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1160
1161 /* Make sure hardware complete it */
1162 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
c416daa9 1163 readl, (!(sts & DMA_GSTS_TES)), sts);
ba395927
KA
1164
1165 spin_unlock_irqrestore(&iommu->register_lock, flag);
1166 return 0;
1167}
1168
3460a6d9 1169
ba395927
KA
1170static int iommu_init_domains(struct intel_iommu *iommu)
1171{
1172 unsigned long ndomains;
1173 unsigned long nlongs;
1174
1175 ndomains = cap_ndoms(iommu->cap);
1176 pr_debug("Number of Domains supportd <%ld>\n", ndomains);
1177 nlongs = BITS_TO_LONGS(ndomains);
1178
94a91b50
DD
1179 spin_lock_init(&iommu->lock);
1180
ba395927
KA
1181 /* TBD: there might be 64K domains,
1182 * consider other allocation for future chip
1183 */
1184 iommu->domain_ids = kcalloc(nlongs, sizeof(unsigned long), GFP_KERNEL);
1185 if (!iommu->domain_ids) {
1186 printk(KERN_ERR "Allocating domain id array failed\n");
1187 return -ENOMEM;
1188 }
1189 iommu->domains = kcalloc(ndomains, sizeof(struct dmar_domain *),
1190 GFP_KERNEL);
1191 if (!iommu->domains) {
1192 printk(KERN_ERR "Allocating domain array failed\n");
ba395927
KA
1193 return -ENOMEM;
1194 }
1195
1196 /*
1197 * if Caching mode is set, then invalid translations are tagged
1198 * with domainid 0. Hence we need to pre-allocate it.
1199 */
1200 if (cap_caching_mode(iommu->cap))
1201 set_bit(0, iommu->domain_ids);
1202 return 0;
1203}
ba395927 1204
ba395927
KA
1205
1206static void domain_exit(struct dmar_domain *domain);
5e98c4b1 1207static void vm_domain_exit(struct dmar_domain *domain);
e61d98d8
SS
1208
1209void free_dmar_iommu(struct intel_iommu *iommu)
ba395927
KA
1210{
1211 struct dmar_domain *domain;
1212 int i;
c7151a8d 1213 unsigned long flags;
ba395927 1214
94a91b50
DD
1215 if ((iommu->domains) && (iommu->domain_ids)) {
1216 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1217 for (; i < cap_ndoms(iommu->cap); ) {
1218 domain = iommu->domains[i];
1219 clear_bit(i, iommu->domain_ids);
1220
1221 spin_lock_irqsave(&domain->iommu_lock, flags);
1222 if (--domain->iommu_count == 0) {
1223 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE)
1224 vm_domain_exit(domain);
1225 else
1226 domain_exit(domain);
1227 }
1228 spin_unlock_irqrestore(&domain->iommu_lock, flags);
c7151a8d 1229
94a91b50
DD
1230 i = find_next_bit(iommu->domain_ids,
1231 cap_ndoms(iommu->cap), i+1);
5e98c4b1 1232 }
ba395927
KA
1233 }
1234
1235 if (iommu->gcmd & DMA_GCMD_TE)
1236 iommu_disable_translation(iommu);
1237
1238 if (iommu->irq) {
1239 set_irq_data(iommu->irq, NULL);
1240 /* This will mask the irq */
1241 free_irq(iommu->irq, iommu);
1242 destroy_irq(iommu->irq);
1243 }
1244
1245 kfree(iommu->domains);
1246 kfree(iommu->domain_ids);
1247
d9630fe9
WH
1248 g_iommus[iommu->seq_id] = NULL;
1249
1250 /* if all iommus are freed, free g_iommus */
1251 for (i = 0; i < g_num_of_iommus; i++) {
1252 if (g_iommus[i])
1253 break;
1254 }
1255
1256 if (i == g_num_of_iommus)
1257 kfree(g_iommus);
1258
ba395927
KA
1259 /* free context mapping */
1260 free_context_table(iommu);
ba395927
KA
1261}
1262
2c2e2c38 1263static struct dmar_domain *alloc_domain(void)
ba395927 1264{
ba395927 1265 struct dmar_domain *domain;
ba395927
KA
1266
1267 domain = alloc_domain_mem();
1268 if (!domain)
1269 return NULL;
1270
4c923d47 1271 domain->nid = -1;
2c2e2c38
FY
1272 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
1273 domain->flags = 0;
1274
1275 return domain;
1276}
1277
1278static int iommu_attach_domain(struct dmar_domain *domain,
1279 struct intel_iommu *iommu)
1280{
1281 int num;
1282 unsigned long ndomains;
1283 unsigned long flags;
1284
ba395927
KA
1285 ndomains = cap_ndoms(iommu->cap);
1286
1287 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38 1288
ba395927
KA
1289 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1290 if (num >= ndomains) {
1291 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927 1292 printk(KERN_ERR "IOMMU: no free domain ids\n");
2c2e2c38 1293 return -ENOMEM;
ba395927
KA
1294 }
1295
ba395927 1296 domain->id = num;
2c2e2c38 1297 set_bit(num, iommu->domain_ids);
8c11e798 1298 set_bit(iommu->seq_id, &domain->iommu_bmp);
ba395927
KA
1299 iommu->domains[num] = domain;
1300 spin_unlock_irqrestore(&iommu->lock, flags);
1301
2c2e2c38 1302 return 0;
ba395927
KA
1303}
1304
2c2e2c38
FY
1305static void iommu_detach_domain(struct dmar_domain *domain,
1306 struct intel_iommu *iommu)
ba395927
KA
1307{
1308 unsigned long flags;
2c2e2c38
FY
1309 int num, ndomains;
1310 int found = 0;
ba395927 1311
8c11e798 1312 spin_lock_irqsave(&iommu->lock, flags);
2c2e2c38
FY
1313 ndomains = cap_ndoms(iommu->cap);
1314 num = find_first_bit(iommu->domain_ids, ndomains);
1315 for (; num < ndomains; ) {
1316 if (iommu->domains[num] == domain) {
1317 found = 1;
1318 break;
1319 }
1320 num = find_next_bit(iommu->domain_ids,
1321 cap_ndoms(iommu->cap), num+1);
1322 }
1323
1324 if (found) {
1325 clear_bit(num, iommu->domain_ids);
1326 clear_bit(iommu->seq_id, &domain->iommu_bmp);
1327 iommu->domains[num] = NULL;
1328 }
8c11e798 1329 spin_unlock_irqrestore(&iommu->lock, flags);
ba395927
KA
1330}
1331
1332static struct iova_domain reserved_iova_list;
8a443df4 1333static struct lock_class_key reserved_rbtree_key;
ba395927
KA
1334
1335static void dmar_init_reserved_ranges(void)
1336{
1337 struct pci_dev *pdev = NULL;
1338 struct iova *iova;
1339 int i;
ba395927 1340
f661197e 1341 init_iova_domain(&reserved_iova_list, DMA_32BIT_PFN);
ba395927 1342
8a443df4
MG
1343 lockdep_set_class(&reserved_iova_list.iova_rbtree_lock,
1344 &reserved_rbtree_key);
1345
ba395927
KA
1346 /* IOAPIC ranges shouldn't be accessed by DMA */
1347 iova = reserve_iova(&reserved_iova_list, IOVA_PFN(IOAPIC_RANGE_START),
1348 IOVA_PFN(IOAPIC_RANGE_END));
1349 if (!iova)
1350 printk(KERN_ERR "Reserve IOAPIC range failed\n");
1351
1352 /* Reserve all PCI MMIO to avoid peer-to-peer access */
1353 for_each_pci_dev(pdev) {
1354 struct resource *r;
1355
1356 for (i = 0; i < PCI_NUM_RESOURCES; i++) {
1357 r = &pdev->resource[i];
1358 if (!r->flags || !(r->flags & IORESOURCE_MEM))
1359 continue;
1a4a4551
DW
1360 iova = reserve_iova(&reserved_iova_list,
1361 IOVA_PFN(r->start),
1362 IOVA_PFN(r->end));
ba395927
KA
1363 if (!iova)
1364 printk(KERN_ERR "Reserve iova failed\n");
1365 }
1366 }
1367
1368}
1369
1370static void domain_reserve_special_ranges(struct dmar_domain *domain)
1371{
1372 copy_reserved_iova(&reserved_iova_list, &domain->iovad);
1373}
1374
1375static inline int guestwidth_to_adjustwidth(int gaw)
1376{
1377 int agaw;
1378 int r = (gaw - 12) % 9;
1379
1380 if (r == 0)
1381 agaw = gaw;
1382 else
1383 agaw = gaw + 9 - r;
1384 if (agaw > 64)
1385 agaw = 64;
1386 return agaw;
1387}
1388
1389static int domain_init(struct dmar_domain *domain, int guest_width)
1390{
1391 struct intel_iommu *iommu;
1392 int adjust_width, agaw;
1393 unsigned long sagaw;
1394
f661197e 1395 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
c7151a8d 1396 spin_lock_init(&domain->iommu_lock);
ba395927
KA
1397
1398 domain_reserve_special_ranges(domain);
1399
1400 /* calculate AGAW */
8c11e798 1401 iommu = domain_get_iommu(domain);
ba395927
KA
1402 if (guest_width > cap_mgaw(iommu->cap))
1403 guest_width = cap_mgaw(iommu->cap);
1404 domain->gaw = guest_width;
1405 adjust_width = guestwidth_to_adjustwidth(guest_width);
1406 agaw = width_to_agaw(adjust_width);
1407 sagaw = cap_sagaw(iommu->cap);
1408 if (!test_bit(agaw, &sagaw)) {
1409 /* hardware doesn't support it, choose a bigger one */
1410 pr_debug("IOMMU: hardware doesn't support agaw %d\n", agaw);
1411 agaw = find_next_bit(&sagaw, 5, agaw);
1412 if (agaw >= 5)
1413 return -ENODEV;
1414 }
1415 domain->agaw = agaw;
1416 INIT_LIST_HEAD(&domain->devices);
1417
8e604097
WH
1418 if (ecap_coherent(iommu->ecap))
1419 domain->iommu_coherency = 1;
1420 else
1421 domain->iommu_coherency = 0;
1422
58c610bd
SY
1423 if (ecap_sc_support(iommu->ecap))
1424 domain->iommu_snooping = 1;
1425 else
1426 domain->iommu_snooping = 0;
1427
c7151a8d 1428 domain->iommu_count = 1;
4c923d47 1429 domain->nid = iommu->node;
c7151a8d 1430
ba395927 1431 /* always allocate the top pgd */
4c923d47 1432 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
ba395927
KA
1433 if (!domain->pgd)
1434 return -ENOMEM;
5b6985ce 1435 __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE);
ba395927
KA
1436 return 0;
1437}
1438
1439static void domain_exit(struct dmar_domain *domain)
1440{
2c2e2c38
FY
1441 struct dmar_drhd_unit *drhd;
1442 struct intel_iommu *iommu;
ba395927
KA
1443
1444 /* Domain 0 is reserved, so dont process it */
1445 if (!domain)
1446 return;
1447
1448 domain_remove_dev_info(domain);
1449 /* destroy iovas */
1450 put_iova_domain(&domain->iovad);
ba395927
KA
1451
1452 /* clear ptes */
595badf5 1453 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927
KA
1454
1455 /* free page tables */
d794dc9b 1456 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
ba395927 1457
2c2e2c38
FY
1458 for_each_active_iommu(iommu, drhd)
1459 if (test_bit(iommu->seq_id, &domain->iommu_bmp))
1460 iommu_detach_domain(domain, iommu);
1461
ba395927
KA
1462 free_domain_mem(domain);
1463}
1464
4ed0d3e6
FY
1465static int domain_context_mapping_one(struct dmar_domain *domain, int segment,
1466 u8 bus, u8 devfn, int translation)
ba395927
KA
1467{
1468 struct context_entry *context;
ba395927 1469 unsigned long flags;
5331fe6f 1470 struct intel_iommu *iommu;
ea6606b0
WH
1471 struct dma_pte *pgd;
1472 unsigned long num;
1473 unsigned long ndomains;
1474 int id;
1475 int agaw;
93a23a72 1476 struct device_domain_info *info = NULL;
ba395927
KA
1477
1478 pr_debug("Set context mapping for %02x:%02x.%d\n",
1479 bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
4ed0d3e6 1480
ba395927 1481 BUG_ON(!domain->pgd);
4ed0d3e6
FY
1482 BUG_ON(translation != CONTEXT_TT_PASS_THROUGH &&
1483 translation != CONTEXT_TT_MULTI_LEVEL);
5331fe6f 1484
276dbf99 1485 iommu = device_to_iommu(segment, bus, devfn);
5331fe6f
WH
1486 if (!iommu)
1487 return -ENODEV;
1488
ba395927
KA
1489 context = device_to_context_entry(iommu, bus, devfn);
1490 if (!context)
1491 return -ENOMEM;
1492 spin_lock_irqsave(&iommu->lock, flags);
c07e7d21 1493 if (context_present(context)) {
ba395927
KA
1494 spin_unlock_irqrestore(&iommu->lock, flags);
1495 return 0;
1496 }
1497
ea6606b0
WH
1498 id = domain->id;
1499 pgd = domain->pgd;
1500
2c2e2c38
FY
1501 if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
1502 domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) {
ea6606b0
WH
1503 int found = 0;
1504
1505 /* find an available domain id for this device in iommu */
1506 ndomains = cap_ndoms(iommu->cap);
1507 num = find_first_bit(iommu->domain_ids, ndomains);
1508 for (; num < ndomains; ) {
1509 if (iommu->domains[num] == domain) {
1510 id = num;
1511 found = 1;
1512 break;
1513 }
1514 num = find_next_bit(iommu->domain_ids,
1515 cap_ndoms(iommu->cap), num+1);
1516 }
1517
1518 if (found == 0) {
1519 num = find_first_zero_bit(iommu->domain_ids, ndomains);
1520 if (num >= ndomains) {
1521 spin_unlock_irqrestore(&iommu->lock, flags);
1522 printk(KERN_ERR "IOMMU: no free domain ids\n");
1523 return -EFAULT;
1524 }
1525
1526 set_bit(num, iommu->domain_ids);
1527 iommu->domains[num] = domain;
1528 id = num;
1529 }
1530
1531 /* Skip top levels of page tables for
1532 * iommu which has less agaw than default.
1533 */
1534 for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) {
1535 pgd = phys_to_virt(dma_pte_addr(pgd));
1536 if (!dma_pte_present(pgd)) {
1537 spin_unlock_irqrestore(&iommu->lock, flags);
1538 return -ENOMEM;
1539 }
1540 }
1541 }
1542
1543 context_set_domain_id(context, id);
4ed0d3e6 1544
93a23a72
YZ
1545 if (translation != CONTEXT_TT_PASS_THROUGH) {
1546 info = iommu_support_dev_iotlb(domain, segment, bus, devfn);
1547 translation = info ? CONTEXT_TT_DEV_IOTLB :
1548 CONTEXT_TT_MULTI_LEVEL;
1549 }
4ed0d3e6
FY
1550 /*
1551 * In pass through mode, AW must be programmed to indicate the largest
1552 * AGAW value supported by hardware. And ASR is ignored by hardware.
1553 */
93a23a72 1554 if (unlikely(translation == CONTEXT_TT_PASS_THROUGH))
4ed0d3e6 1555 context_set_address_width(context, iommu->msagaw);
93a23a72
YZ
1556 else {
1557 context_set_address_root(context, virt_to_phys(pgd));
1558 context_set_address_width(context, iommu->agaw);
1559 }
4ed0d3e6
FY
1560
1561 context_set_translation_type(context, translation);
c07e7d21
MM
1562 context_set_fault_enable(context);
1563 context_set_present(context);
5331fe6f 1564 domain_flush_cache(domain, context, sizeof(*context));
ba395927 1565
4c25a2c1
DW
1566 /*
1567 * It's a non-present to present mapping. If hardware doesn't cache
1568 * non-present entry we only need to flush the write-buffer. If the
1569 * _does_ cache non-present entries, then it does so in the special
1570 * domain #0, which we have to flush:
1571 */
1572 if (cap_caching_mode(iommu->cap)) {
1573 iommu->flush.flush_context(iommu, 0,
1574 (((u16)bus) << 8) | devfn,
1575 DMA_CCMD_MASK_NOBIT,
1576 DMA_CCMD_DEVICE_INVL);
1f0ef2aa 1577 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_DSI_FLUSH);
4c25a2c1 1578 } else {
ba395927 1579 iommu_flush_write_buffer(iommu);
4c25a2c1 1580 }
93a23a72 1581 iommu_enable_dev_iotlb(info);
ba395927 1582 spin_unlock_irqrestore(&iommu->lock, flags);
c7151a8d
WH
1583
1584 spin_lock_irqsave(&domain->iommu_lock, flags);
1585 if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) {
1586 domain->iommu_count++;
4c923d47
SS
1587 if (domain->iommu_count == 1)
1588 domain->nid = iommu->node;
58c610bd 1589 domain_update_iommu_cap(domain);
c7151a8d
WH
1590 }
1591 spin_unlock_irqrestore(&domain->iommu_lock, flags);
ba395927
KA
1592 return 0;
1593}
1594
1595static int
4ed0d3e6
FY
1596domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev,
1597 int translation)
ba395927
KA
1598{
1599 int ret;
1600 struct pci_dev *tmp, *parent;
1601
276dbf99 1602 ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus),
4ed0d3e6
FY
1603 pdev->bus->number, pdev->devfn,
1604 translation);
ba395927
KA
1605 if (ret)
1606 return ret;
1607
1608 /* dependent device mapping */
1609 tmp = pci_find_upstream_pcie_bridge(pdev);
1610 if (!tmp)
1611 return 0;
1612 /* Secondary interface's bus number and devfn 0 */
1613 parent = pdev->bus->self;
1614 while (parent != tmp) {
276dbf99
DW
1615 ret = domain_context_mapping_one(domain,
1616 pci_domain_nr(parent->bus),
1617 parent->bus->number,
4ed0d3e6 1618 parent->devfn, translation);
ba395927
KA
1619 if (ret)
1620 return ret;
1621 parent = parent->bus->self;
1622 }
1623 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
1624 return domain_context_mapping_one(domain,
276dbf99 1625 pci_domain_nr(tmp->subordinate),
4ed0d3e6
FY
1626 tmp->subordinate->number, 0,
1627 translation);
ba395927
KA
1628 else /* this is a legacy PCI bridge */
1629 return domain_context_mapping_one(domain,
276dbf99
DW
1630 pci_domain_nr(tmp->bus),
1631 tmp->bus->number,
4ed0d3e6
FY
1632 tmp->devfn,
1633 translation);
ba395927
KA
1634}
1635
5331fe6f 1636static int domain_context_mapped(struct pci_dev *pdev)
ba395927
KA
1637{
1638 int ret;
1639 struct pci_dev *tmp, *parent;
5331fe6f
WH
1640 struct intel_iommu *iommu;
1641
276dbf99
DW
1642 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
1643 pdev->devfn);
5331fe6f
WH
1644 if (!iommu)
1645 return -ENODEV;
ba395927 1646
276dbf99 1647 ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn);
ba395927
KA
1648 if (!ret)
1649 return ret;
1650 /* dependent device mapping */
1651 tmp = pci_find_upstream_pcie_bridge(pdev);
1652 if (!tmp)
1653 return ret;
1654 /* Secondary interface's bus number and devfn 0 */
1655 parent = pdev->bus->self;
1656 while (parent != tmp) {
8c11e798 1657 ret = device_context_mapped(iommu, parent->bus->number,
276dbf99 1658 parent->devfn);
ba395927
KA
1659 if (!ret)
1660 return ret;
1661 parent = parent->bus->self;
1662 }
1663 if (tmp->is_pcie)
276dbf99
DW
1664 return device_context_mapped(iommu, tmp->subordinate->number,
1665 0);
ba395927 1666 else
276dbf99
DW
1667 return device_context_mapped(iommu, tmp->bus->number,
1668 tmp->devfn);
ba395927
KA
1669}
1670
f532959b
FY
1671/* Returns a number of VTD pages, but aligned to MM page size */
1672static inline unsigned long aligned_nrpages(unsigned long host_addr,
1673 size_t size)
1674{
1675 host_addr &= ~PAGE_MASK;
1676 return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT;
1677}
1678
9051aa02
DW
1679static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1680 struct scatterlist *sg, unsigned long phys_pfn,
1681 unsigned long nr_pages, int prot)
e1605495
DW
1682{
1683 struct dma_pte *first_pte = NULL, *pte = NULL;
9051aa02 1684 phys_addr_t uninitialized_var(pteval);
e1605495 1685 int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT;
9051aa02 1686 unsigned long sg_res;
e1605495
DW
1687
1688 BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width);
1689
1690 if ((prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0)
1691 return -EINVAL;
1692
1693 prot &= DMA_PTE_READ | DMA_PTE_WRITE | DMA_PTE_SNP;
1694
9051aa02
DW
1695 if (sg)
1696 sg_res = 0;
1697 else {
1698 sg_res = nr_pages + 1;
1699 pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot;
1700 }
1701
e1605495 1702 while (nr_pages--) {
c85994e4
DW
1703 uint64_t tmp;
1704
e1605495 1705 if (!sg_res) {
f532959b 1706 sg_res = aligned_nrpages(sg->offset, sg->length);
e1605495
DW
1707 sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset;
1708 sg->dma_length = sg->length;
1709 pteval = page_to_phys(sg_page(sg)) | prot;
1710 }
1711 if (!pte) {
1712 first_pte = pte = pfn_to_dma_pte(domain, iov_pfn);
1713 if (!pte)
1714 return -ENOMEM;
1715 }
1716 /* We don't need lock here, nobody else
1717 * touches the iova range
1718 */
7766a3fb 1719 tmp = cmpxchg64_local(&pte->val, 0ULL, pteval);
c85994e4 1720 if (tmp) {
1bf20f0d 1721 static int dumps = 5;
c85994e4
DW
1722 printk(KERN_CRIT "ERROR: DMA PTE for vPFN 0x%lx already set (to %llx not %llx)\n",
1723 iov_pfn, tmp, (unsigned long long)pteval);
1bf20f0d
DW
1724 if (dumps) {
1725 dumps--;
1726 debug_dma_dump_mappings(NULL);
1727 }
1728 WARN_ON(1);
1729 }
e1605495 1730 pte++;
75e6bf96 1731 if (!nr_pages || first_pte_in_page(pte)) {
e1605495
DW
1732 domain_flush_cache(domain, first_pte,
1733 (void *)pte - (void *)first_pte);
1734 pte = NULL;
1735 }
1736 iov_pfn++;
1737 pteval += VTD_PAGE_SIZE;
1738 sg_res--;
1739 if (!sg_res)
1740 sg = sg_next(sg);
1741 }
1742 return 0;
1743}
1744
9051aa02
DW
1745static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1746 struct scatterlist *sg, unsigned long nr_pages,
1747 int prot)
ba395927 1748{
9051aa02
DW
1749 return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot);
1750}
6f6a00e4 1751
9051aa02
DW
1752static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
1753 unsigned long phys_pfn, unsigned long nr_pages,
1754 int prot)
1755{
1756 return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot);
ba395927
KA
1757}
1758
c7151a8d 1759static void iommu_detach_dev(struct intel_iommu *iommu, u8 bus, u8 devfn)
ba395927 1760{
c7151a8d
WH
1761 if (!iommu)
1762 return;
8c11e798
WH
1763
1764 clear_context_table(iommu, bus, devfn);
1765 iommu->flush.flush_context(iommu, 0, 0, 0,
4c25a2c1 1766 DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 1767 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
ba395927
KA
1768}
1769
1770static void domain_remove_dev_info(struct dmar_domain *domain)
1771{
1772 struct device_domain_info *info;
1773 unsigned long flags;
c7151a8d 1774 struct intel_iommu *iommu;
ba395927
KA
1775
1776 spin_lock_irqsave(&device_domain_lock, flags);
1777 while (!list_empty(&domain->devices)) {
1778 info = list_entry(domain->devices.next,
1779 struct device_domain_info, link);
1780 list_del(&info->link);
1781 list_del(&info->global);
1782 if (info->dev)
358dd8ac 1783 info->dev->dev.archdata.iommu = NULL;
ba395927
KA
1784 spin_unlock_irqrestore(&device_domain_lock, flags);
1785
93a23a72 1786 iommu_disable_dev_iotlb(info);
276dbf99 1787 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 1788 iommu_detach_dev(iommu, info->bus, info->devfn);
ba395927
KA
1789 free_devinfo_mem(info);
1790
1791 spin_lock_irqsave(&device_domain_lock, flags);
1792 }
1793 spin_unlock_irqrestore(&device_domain_lock, flags);
1794}
1795
1796/*
1797 * find_domain
358dd8ac 1798 * Note: we use struct pci_dev->dev.archdata.iommu stores the info
ba395927 1799 */
38717946 1800static struct dmar_domain *
ba395927
KA
1801find_domain(struct pci_dev *pdev)
1802{
1803 struct device_domain_info *info;
1804
1805 /* No lock here, assumes no domain exit in normal case */
358dd8ac 1806 info = pdev->dev.archdata.iommu;
ba395927
KA
1807 if (info)
1808 return info->domain;
1809 return NULL;
1810}
1811
ba395927
KA
1812/* domain is initialized */
1813static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1814{
1815 struct dmar_domain *domain, *found = NULL;
1816 struct intel_iommu *iommu;
1817 struct dmar_drhd_unit *drhd;
1818 struct device_domain_info *info, *tmp;
1819 struct pci_dev *dev_tmp;
1820 unsigned long flags;
1821 int bus = 0, devfn = 0;
276dbf99 1822 int segment;
2c2e2c38 1823 int ret;
ba395927
KA
1824
1825 domain = find_domain(pdev);
1826 if (domain)
1827 return domain;
1828
276dbf99
DW
1829 segment = pci_domain_nr(pdev->bus);
1830
ba395927
KA
1831 dev_tmp = pci_find_upstream_pcie_bridge(pdev);
1832 if (dev_tmp) {
1833 if (dev_tmp->is_pcie) {
1834 bus = dev_tmp->subordinate->number;
1835 devfn = 0;
1836 } else {
1837 bus = dev_tmp->bus->number;
1838 devfn = dev_tmp->devfn;
1839 }
1840 spin_lock_irqsave(&device_domain_lock, flags);
1841 list_for_each_entry(info, &device_domain_list, global) {
276dbf99
DW
1842 if (info->segment == segment &&
1843 info->bus == bus && info->devfn == devfn) {
ba395927
KA
1844 found = info->domain;
1845 break;
1846 }
1847 }
1848 spin_unlock_irqrestore(&device_domain_lock, flags);
1849 /* pcie-pci bridge already has a domain, uses it */
1850 if (found) {
1851 domain = found;
1852 goto found_domain;
1853 }
1854 }
1855
2c2e2c38
FY
1856 domain = alloc_domain();
1857 if (!domain)
1858 goto error;
1859
ba395927
KA
1860 /* Allocate new domain for the device */
1861 drhd = dmar_find_matched_drhd_unit(pdev);
1862 if (!drhd) {
1863 printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n",
1864 pci_name(pdev));
1865 return NULL;
1866 }
1867 iommu = drhd->iommu;
1868
2c2e2c38
FY
1869 ret = iommu_attach_domain(domain, iommu);
1870 if (ret) {
1871 domain_exit(domain);
ba395927 1872 goto error;
2c2e2c38 1873 }
ba395927
KA
1874
1875 if (domain_init(domain, gaw)) {
1876 domain_exit(domain);
1877 goto error;
1878 }
1879
1880 /* register pcie-to-pci device */
1881 if (dev_tmp) {
1882 info = alloc_devinfo_mem();
1883 if (!info) {
1884 domain_exit(domain);
1885 goto error;
1886 }
276dbf99 1887 info->segment = segment;
ba395927
KA
1888 info->bus = bus;
1889 info->devfn = devfn;
1890 info->dev = NULL;
1891 info->domain = domain;
1892 /* This domain is shared by devices under p2p bridge */
3b5410e7 1893 domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES;
ba395927
KA
1894
1895 /* pcie-to-pci bridge already has a domain, uses it */
1896 found = NULL;
1897 spin_lock_irqsave(&device_domain_lock, flags);
1898 list_for_each_entry(tmp, &device_domain_list, global) {
276dbf99
DW
1899 if (tmp->segment == segment &&
1900 tmp->bus == bus && tmp->devfn == devfn) {
ba395927
KA
1901 found = tmp->domain;
1902 break;
1903 }
1904 }
1905 if (found) {
1906 free_devinfo_mem(info);
1907 domain_exit(domain);
1908 domain = found;
1909 } else {
1910 list_add(&info->link, &domain->devices);
1911 list_add(&info->global, &device_domain_list);
1912 }
1913 spin_unlock_irqrestore(&device_domain_lock, flags);
1914 }
1915
1916found_domain:
1917 info = alloc_devinfo_mem();
1918 if (!info)
1919 goto error;
276dbf99 1920 info->segment = segment;
ba395927
KA
1921 info->bus = pdev->bus->number;
1922 info->devfn = pdev->devfn;
1923 info->dev = pdev;
1924 info->domain = domain;
1925 spin_lock_irqsave(&device_domain_lock, flags);
1926 /* somebody is fast */
1927 found = find_domain(pdev);
1928 if (found != NULL) {
1929 spin_unlock_irqrestore(&device_domain_lock, flags);
1930 if (found != domain) {
1931 domain_exit(domain);
1932 domain = found;
1933 }
1934 free_devinfo_mem(info);
1935 return domain;
1936 }
1937 list_add(&info->link, &domain->devices);
1938 list_add(&info->global, &device_domain_list);
358dd8ac 1939 pdev->dev.archdata.iommu = info;
ba395927
KA
1940 spin_unlock_irqrestore(&device_domain_lock, flags);
1941 return domain;
1942error:
1943 /* recheck it here, maybe others set it */
1944 return find_domain(pdev);
1945}
1946
2c2e2c38 1947static int iommu_identity_mapping;
e0fc7e0b
DW
1948#define IDENTMAP_ALL 1
1949#define IDENTMAP_GFX 2
1950#define IDENTMAP_AZALIA 4
2c2e2c38 1951
b213203e
DW
1952static int iommu_domain_identity_map(struct dmar_domain *domain,
1953 unsigned long long start,
1954 unsigned long long end)
ba395927 1955{
c5395d5c
DW
1956 unsigned long first_vpfn = start >> VTD_PAGE_SHIFT;
1957 unsigned long last_vpfn = end >> VTD_PAGE_SHIFT;
1958
1959 if (!reserve_iova(&domain->iovad, dma_to_mm_pfn(first_vpfn),
1960 dma_to_mm_pfn(last_vpfn))) {
ba395927 1961 printk(KERN_ERR "IOMMU: reserve iova failed\n");
b213203e 1962 return -ENOMEM;
ba395927
KA
1963 }
1964
c5395d5c
DW
1965 pr_debug("Mapping reserved region %llx-%llx for domain %d\n",
1966 start, end, domain->id);
ba395927
KA
1967 /*
1968 * RMRR range might have overlap with physical memory range,
1969 * clear it first
1970 */
c5395d5c 1971 dma_pte_clear_range(domain, first_vpfn, last_vpfn);
ba395927 1972
c5395d5c
DW
1973 return domain_pfn_mapping(domain, first_vpfn, first_vpfn,
1974 last_vpfn - first_vpfn + 1,
61df7443 1975 DMA_PTE_READ|DMA_PTE_WRITE);
b213203e
DW
1976}
1977
1978static int iommu_prepare_identity_map(struct pci_dev *pdev,
1979 unsigned long long start,
1980 unsigned long long end)
1981{
1982 struct dmar_domain *domain;
1983 int ret;
1984
c7ab48d2 1985 domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH);
b213203e
DW
1986 if (!domain)
1987 return -ENOMEM;
1988
19943b0e
DW
1989 /* For _hardware_ passthrough, don't bother. But for software
1990 passthrough, we do it anyway -- it may indicate a memory
1991 range which is reserved in E820, so which didn't get set
1992 up to start with in si_domain */
1993 if (domain == si_domain && hw_pass_through) {
1994 printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n",
1995 pci_name(pdev), start, end);
1996 return 0;
1997 }
1998
1999 printk(KERN_INFO
2000 "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n",
2001 pci_name(pdev), start, end);
2ff729f5
DW
2002
2003 if (end >> agaw_to_width(domain->agaw)) {
2004 WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n"
2005 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
2006 agaw_to_width(domain->agaw),
2007 dmi_get_system_info(DMI_BIOS_VENDOR),
2008 dmi_get_system_info(DMI_BIOS_VERSION),
2009 dmi_get_system_info(DMI_PRODUCT_VERSION));
2010 ret = -EIO;
2011 goto error;
2012 }
19943b0e 2013
b213203e 2014 ret = iommu_domain_identity_map(domain, start, end);
ba395927
KA
2015 if (ret)
2016 goto error;
2017
2018 /* context entry init */
4ed0d3e6 2019 ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL);
b213203e
DW
2020 if (ret)
2021 goto error;
2022
2023 return 0;
2024
2025 error:
ba395927
KA
2026 domain_exit(domain);
2027 return ret;
ba395927
KA
2028}
2029
2030static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr,
2031 struct pci_dev *pdev)
2032{
358dd8ac 2033 if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO)
ba395927
KA
2034 return 0;
2035 return iommu_prepare_identity_map(pdev, rmrr->base_address,
2036 rmrr->end_address + 1);
2037}
2038
49a0429e
KA
2039#ifdef CONFIG_DMAR_FLOPPY_WA
2040static inline void iommu_prepare_isa(void)
2041{
2042 struct pci_dev *pdev;
2043 int ret;
2044
2045 pdev = pci_get_class(PCI_CLASS_BRIDGE_ISA << 8, NULL);
2046 if (!pdev)
2047 return;
2048
c7ab48d2 2049 printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n");
49a0429e
KA
2050 ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024);
2051
2052 if (ret)
c7ab48d2
DW
2053 printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; "
2054 "floppy might not work\n");
49a0429e
KA
2055
2056}
2057#else
2058static inline void iommu_prepare_isa(void)
2059{
2060 return;
2061}
2062#endif /* !CONFIG_DMAR_FLPY_WA */
2063
2c2e2c38 2064static int md_domain_init(struct dmar_domain *domain, int guest_width);
c7ab48d2
DW
2065
2066static int __init si_domain_work_fn(unsigned long start_pfn,
2067 unsigned long end_pfn, void *datax)
2068{
2069 int *ret = datax;
2070
2071 *ret = iommu_domain_identity_map(si_domain,
2072 (uint64_t)start_pfn << PAGE_SHIFT,
2073 (uint64_t)end_pfn << PAGE_SHIFT);
2074 return *ret;
2075
2076}
2077
071e1374 2078static int __init si_domain_init(int hw)
2c2e2c38
FY
2079{
2080 struct dmar_drhd_unit *drhd;
2081 struct intel_iommu *iommu;
c7ab48d2 2082 int nid, ret = 0;
2c2e2c38
FY
2083
2084 si_domain = alloc_domain();
2085 if (!si_domain)
2086 return -EFAULT;
2087
c7ab48d2 2088 pr_debug("Identity mapping domain is domain %d\n", si_domain->id);
2c2e2c38
FY
2089
2090 for_each_active_iommu(iommu, drhd) {
2091 ret = iommu_attach_domain(si_domain, iommu);
2092 if (ret) {
2093 domain_exit(si_domain);
2094 return -EFAULT;
2095 }
2096 }
2097
2098 if (md_domain_init(si_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
2099 domain_exit(si_domain);
2100 return -EFAULT;
2101 }
2102
2103 si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY;
2104
19943b0e
DW
2105 if (hw)
2106 return 0;
2107
c7ab48d2
DW
2108 for_each_online_node(nid) {
2109 work_with_active_regions(nid, si_domain_work_fn, &ret);
2110 if (ret)
2111 return ret;
2112 }
2113
2c2e2c38
FY
2114 return 0;
2115}
2116
2117static void domain_remove_one_dev_info(struct dmar_domain *domain,
2118 struct pci_dev *pdev);
2119static int identity_mapping(struct pci_dev *pdev)
2120{
2121 struct device_domain_info *info;
2122
2123 if (likely(!iommu_identity_mapping))
2124 return 0;
2125
2126
2127 list_for_each_entry(info, &si_domain->devices, link)
2128 if (info->dev == pdev)
2129 return 1;
2130 return 0;
2131}
2132
2133static int domain_add_dev_info(struct dmar_domain *domain,
5fe60f4e
DW
2134 struct pci_dev *pdev,
2135 int translation)
2c2e2c38
FY
2136{
2137 struct device_domain_info *info;
2138 unsigned long flags;
5fe60f4e 2139 int ret;
2c2e2c38
FY
2140
2141 info = alloc_devinfo_mem();
2142 if (!info)
2143 return -ENOMEM;
2144
5fe60f4e
DW
2145 ret = domain_context_mapping(domain, pdev, translation);
2146 if (ret) {
2147 free_devinfo_mem(info);
2148 return ret;
2149 }
2150
2c2e2c38
FY
2151 info->segment = pci_domain_nr(pdev->bus);
2152 info->bus = pdev->bus->number;
2153 info->devfn = pdev->devfn;
2154 info->dev = pdev;
2155 info->domain = domain;
2156
2157 spin_lock_irqsave(&device_domain_lock, flags);
2158 list_add(&info->link, &domain->devices);
2159 list_add(&info->global, &device_domain_list);
2160 pdev->dev.archdata.iommu = info;
2161 spin_unlock_irqrestore(&device_domain_lock, flags);
2162
2163 return 0;
2164}
2165
6941af28
DW
2166static int iommu_should_identity_map(struct pci_dev *pdev, int startup)
2167{
e0fc7e0b
DW
2168 if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev))
2169 return 1;
2170
2171 if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev))
2172 return 1;
2173
2174 if (!(iommu_identity_mapping & IDENTMAP_ALL))
2175 return 0;
6941af28 2176
3dfc813d
DW
2177 /*
2178 * We want to start off with all devices in the 1:1 domain, and
2179 * take them out later if we find they can't access all of memory.
2180 *
2181 * However, we can't do this for PCI devices behind bridges,
2182 * because all PCI devices behind the same bridge will end up
2183 * with the same source-id on their transactions.
2184 *
2185 * Practically speaking, we can't change things around for these
2186 * devices at run-time, because we can't be sure there'll be no
2187 * DMA transactions in flight for any of their siblings.
2188 *
2189 * So PCI devices (unless they're on the root bus) as well as
2190 * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of
2191 * the 1:1 domain, just in _case_ one of their siblings turns out
2192 * not to be able to map all of memory.
2193 */
2194 if (!pdev->is_pcie) {
2195 if (!pci_is_root_bus(pdev->bus))
2196 return 0;
2197 if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI)
2198 return 0;
2199 } else if (pdev->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
2200 return 0;
2201
2202 /*
2203 * At boot time, we don't yet know if devices will be 64-bit capable.
2204 * Assume that they will -- if they turn out not to be, then we can
2205 * take them out of the 1:1 domain later.
2206 */
6941af28
DW
2207 if (!startup)
2208 return pdev->dma_mask > DMA_BIT_MASK(32);
2209
2210 return 1;
2211}
2212
071e1374 2213static int __init iommu_prepare_static_identity_mapping(int hw)
2c2e2c38 2214{
2c2e2c38
FY
2215 struct pci_dev *pdev = NULL;
2216 int ret;
2217
19943b0e 2218 ret = si_domain_init(hw);
2c2e2c38
FY
2219 if (ret)
2220 return -EFAULT;
2221
2c2e2c38 2222 for_each_pci_dev(pdev) {
6941af28 2223 if (iommu_should_identity_map(pdev, 1)) {
19943b0e
DW
2224 printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n",
2225 hw ? "hardware" : "software", pci_name(pdev));
62edf5dc 2226
5fe60f4e 2227 ret = domain_add_dev_info(si_domain, pdev,
19943b0e 2228 hw ? CONTEXT_TT_PASS_THROUGH :
62edf5dc
DW
2229 CONTEXT_TT_MULTI_LEVEL);
2230 if (ret)
2231 return ret;
62edf5dc 2232 }
2c2e2c38
FY
2233 }
2234
2235 return 0;
2236}
2237
2238int __init init_dmars(void)
ba395927
KA
2239{
2240 struct dmar_drhd_unit *drhd;
2241 struct dmar_rmrr_unit *rmrr;
2242 struct pci_dev *pdev;
2243 struct intel_iommu *iommu;
9d783ba0 2244 int i, ret;
2c2e2c38 2245
ba395927
KA
2246 /*
2247 * for each drhd
2248 * allocate root
2249 * initialize and program root entry to not present
2250 * endfor
2251 */
2252 for_each_drhd_unit(drhd) {
5e0d2a6f 2253 g_num_of_iommus++;
2254 /*
2255 * lock not needed as this is only incremented in the single
2256 * threaded kernel __init code path all other access are read
2257 * only
2258 */
2259 }
2260
d9630fe9
WH
2261 g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
2262 GFP_KERNEL);
2263 if (!g_iommus) {
2264 printk(KERN_ERR "Allocating global iommu array failed\n");
2265 ret = -ENOMEM;
2266 goto error;
2267 }
2268
80b20dd8 2269 deferred_flush = kzalloc(g_num_of_iommus *
2270 sizeof(struct deferred_flush_tables), GFP_KERNEL);
2271 if (!deferred_flush) {
5e0d2a6f 2272 ret = -ENOMEM;
2273 goto error;
2274 }
2275
5e0d2a6f 2276 for_each_drhd_unit(drhd) {
2277 if (drhd->ignored)
2278 continue;
1886e8a9
SS
2279
2280 iommu = drhd->iommu;
d9630fe9 2281 g_iommus[iommu->seq_id] = iommu;
ba395927 2282
e61d98d8
SS
2283 ret = iommu_init_domains(iommu);
2284 if (ret)
2285 goto error;
2286
ba395927
KA
2287 /*
2288 * TBD:
2289 * we could share the same root & context tables
2290 * amoung all IOMMU's. Need to Split it later.
2291 */
2292 ret = iommu_alloc_root_entry(iommu);
2293 if (ret) {
2294 printk(KERN_ERR "IOMMU: allocate root entry failed\n");
2295 goto error;
2296 }
4ed0d3e6 2297 if (!ecap_pass_through(iommu->ecap))
19943b0e 2298 hw_pass_through = 0;
ba395927
KA
2299 }
2300
1531a6a6
SS
2301 /*
2302 * Start from the sane iommu hardware state.
2303 */
a77b67d4
YS
2304 for_each_drhd_unit(drhd) {
2305 if (drhd->ignored)
2306 continue;
2307
2308 iommu = drhd->iommu;
1531a6a6
SS
2309
2310 /*
2311 * If the queued invalidation is already initialized by us
2312 * (for example, while enabling interrupt-remapping) then
2313 * we got the things already rolling from a sane state.
2314 */
2315 if (iommu->qi)
2316 continue;
2317
2318 /*
2319 * Clear any previous faults.
2320 */
2321 dmar_fault(-1, iommu);
2322 /*
2323 * Disable queued invalidation if supported and already enabled
2324 * before OS handover.
2325 */
2326 dmar_disable_qi(iommu);
2327 }
2328
2329 for_each_drhd_unit(drhd) {
2330 if (drhd->ignored)
2331 continue;
2332
2333 iommu = drhd->iommu;
2334
a77b67d4
YS
2335 if (dmar_enable_qi(iommu)) {
2336 /*
2337 * Queued Invalidate not enabled, use Register Based
2338 * Invalidate
2339 */
2340 iommu->flush.flush_context = __iommu_flush_context;
2341 iommu->flush.flush_iotlb = __iommu_flush_iotlb;
2342 printk(KERN_INFO "IOMMU 0x%Lx: using Register based "
b4e0f9eb
FT
2343 "invalidation\n",
2344 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2345 } else {
2346 iommu->flush.flush_context = qi_flush_context;
2347 iommu->flush.flush_iotlb = qi_flush_iotlb;
2348 printk(KERN_INFO "IOMMU 0x%Lx: using Queued "
b4e0f9eb
FT
2349 "invalidation\n",
2350 (unsigned long long)drhd->reg_base_addr);
a77b67d4
YS
2351 }
2352 }
2353
19943b0e 2354 if (iommu_pass_through)
e0fc7e0b
DW
2355 iommu_identity_mapping |= IDENTMAP_ALL;
2356
19943b0e 2357#ifdef CONFIG_DMAR_BROKEN_GFX_WA
e0fc7e0b 2358 iommu_identity_mapping |= IDENTMAP_GFX;
19943b0e 2359#endif
e0fc7e0b
DW
2360
2361 check_tylersburg_isoch();
2362
ba395927 2363 /*
19943b0e
DW
2364 * If pass through is not set or not enabled, setup context entries for
2365 * identity mappings for rmrr, gfx, and isa and may fall back to static
2366 * identity mapping if iommu_identity_mapping is set.
ba395927 2367 */
19943b0e
DW
2368 if (iommu_identity_mapping) {
2369 ret = iommu_prepare_static_identity_mapping(hw_pass_through);
4ed0d3e6 2370 if (ret) {
19943b0e
DW
2371 printk(KERN_CRIT "Failed to setup IOMMU pass-through\n");
2372 goto error;
ba395927
KA
2373 }
2374 }
ba395927 2375 /*
19943b0e
DW
2376 * For each rmrr
2377 * for each dev attached to rmrr
2378 * do
2379 * locate drhd for dev, alloc domain for dev
2380 * allocate free domain
2381 * allocate page table entries for rmrr
2382 * if context not allocated for bus
2383 * allocate and init context
2384 * set present in root table for this bus
2385 * init context with domain, translation etc
2386 * endfor
2387 * endfor
ba395927 2388 */
19943b0e
DW
2389 printk(KERN_INFO "IOMMU: Setting RMRR:\n");
2390 for_each_rmrr_units(rmrr) {
2391 for (i = 0; i < rmrr->devices_cnt; i++) {
2392 pdev = rmrr->devices[i];
2393 /*
2394 * some BIOS lists non-exist devices in DMAR
2395 * table.
2396 */
2397 if (!pdev)
2398 continue;
2399 ret = iommu_prepare_rmrr_dev(rmrr, pdev);
2400 if (ret)
2401 printk(KERN_ERR
2402 "IOMMU: mapping reserved region failed\n");
ba395927 2403 }
4ed0d3e6 2404 }
49a0429e 2405
19943b0e
DW
2406 iommu_prepare_isa();
2407
ba395927
KA
2408 /*
2409 * for each drhd
2410 * enable fault log
2411 * global invalidate context cache
2412 * global invalidate iotlb
2413 * enable translation
2414 */
2415 for_each_drhd_unit(drhd) {
2416 if (drhd->ignored)
2417 continue;
2418 iommu = drhd->iommu;
ba395927
KA
2419
2420 iommu_flush_write_buffer(iommu);
2421
3460a6d9
KA
2422 ret = dmar_set_interrupt(iommu);
2423 if (ret)
2424 goto error;
2425
ba395927
KA
2426 iommu_set_root_entry(iommu);
2427
4c25a2c1 2428 iommu->flush.flush_context(iommu, 0, 0, 0, DMA_CCMD_GLOBAL_INVL);
1f0ef2aa 2429 iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH);
f8bab735 2430
ba395927
KA
2431 ret = iommu_enable_translation(iommu);
2432 if (ret)
2433 goto error;
b94996c9
DW
2434
2435 iommu_disable_protect_mem_regions(iommu);
ba395927
KA
2436 }
2437
2438 return 0;
2439error:
2440 for_each_drhd_unit(drhd) {
2441 if (drhd->ignored)
2442 continue;
2443 iommu = drhd->iommu;
2444 free_iommu(iommu);
2445 }
d9630fe9 2446 kfree(g_iommus);
ba395927
KA
2447 return ret;
2448}
2449
5a5e02a6 2450/* This takes a number of _MM_ pages, not VTD pages */
875764de
DW
2451static struct iova *intel_alloc_iova(struct device *dev,
2452 struct dmar_domain *domain,
2453 unsigned long nrpages, uint64_t dma_mask)
ba395927 2454{
ba395927 2455 struct pci_dev *pdev = to_pci_dev(dev);
ba395927 2456 struct iova *iova = NULL;
ba395927 2457
875764de
DW
2458 /* Restrict dma_mask to the width that the iommu can handle */
2459 dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask);
2460
2461 if (!dmar_forcedac && dma_mask > DMA_BIT_MASK(32)) {
ba395927
KA
2462 /*
2463 * First try to allocate an io virtual address in
284901a9 2464 * DMA_BIT_MASK(32) and if that fails then try allocating
3609801e 2465 * from higher range
ba395927 2466 */
875764de
DW
2467 iova = alloc_iova(&domain->iovad, nrpages,
2468 IOVA_PFN(DMA_BIT_MASK(32)), 1);
2469 if (iova)
2470 return iova;
2471 }
2472 iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1);
2473 if (unlikely(!iova)) {
2474 printk(KERN_ERR "Allocating %ld-page iova for %s failed",
2475 nrpages, pci_name(pdev));
f76aec76
KA
2476 return NULL;
2477 }
2478
2479 return iova;
2480}
2481
147202aa 2482static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev)
f76aec76
KA
2483{
2484 struct dmar_domain *domain;
2485 int ret;
2486
2487 domain = get_domain_for_dev(pdev,
2488 DEFAULT_DOMAIN_ADDRESS_WIDTH);
2489 if (!domain) {
2490 printk(KERN_ERR
2491 "Allocating domain for %s failed", pci_name(pdev));
4fe05bbc 2492 return NULL;
ba395927
KA
2493 }
2494
2495 /* make sure context mapping is ok */
5331fe6f 2496 if (unlikely(!domain_context_mapped(pdev))) {
4ed0d3e6
FY
2497 ret = domain_context_mapping(domain, pdev,
2498 CONTEXT_TT_MULTI_LEVEL);
f76aec76
KA
2499 if (ret) {
2500 printk(KERN_ERR
2501 "Domain context map for %s failed",
2502 pci_name(pdev));
4fe05bbc 2503 return NULL;
f76aec76 2504 }
ba395927
KA
2505 }
2506
f76aec76
KA
2507 return domain;
2508}
2509
147202aa
DW
2510static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev)
2511{
2512 struct device_domain_info *info;
2513
2514 /* No lock here, assumes no domain exit in normal case */
2515 info = dev->dev.archdata.iommu;
2516 if (likely(info))
2517 return info->domain;
2518
2519 return __get_valid_domain_for_dev(dev);
2520}
2521
2c2e2c38
FY
2522static int iommu_dummy(struct pci_dev *pdev)
2523{
2524 return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO;
2525}
2526
2527/* Check if the pdev needs to go through non-identity map and unmap process.*/
73676832 2528static int iommu_no_mapping(struct device *dev)
2c2e2c38 2529{
73676832 2530 struct pci_dev *pdev;
2c2e2c38
FY
2531 int found;
2532
73676832
DW
2533 if (unlikely(dev->bus != &pci_bus_type))
2534 return 1;
2535
2536 pdev = to_pci_dev(dev);
1e4c64c4
DW
2537 if (iommu_dummy(pdev))
2538 return 1;
2539
2c2e2c38 2540 if (!iommu_identity_mapping)
1e4c64c4 2541 return 0;
2c2e2c38
FY
2542
2543 found = identity_mapping(pdev);
2544 if (found) {
6941af28 2545 if (iommu_should_identity_map(pdev, 0))
2c2e2c38
FY
2546 return 1;
2547 else {
2548 /*
2549 * 32 bit DMA is removed from si_domain and fall back
2550 * to non-identity mapping.
2551 */
2552 domain_remove_one_dev_info(si_domain, pdev);
2553 printk(KERN_INFO "32bit %s uses non-identity mapping\n",
2554 pci_name(pdev));
2555 return 0;
2556 }
2557 } else {
2558 /*
2559 * In case of a detached 64 bit DMA device from vm, the device
2560 * is put into si_domain for identity mapping.
2561 */
6941af28 2562 if (iommu_should_identity_map(pdev, 0)) {
2c2e2c38 2563 int ret;
5fe60f4e
DW
2564 ret = domain_add_dev_info(si_domain, pdev,
2565 hw_pass_through ?
2566 CONTEXT_TT_PASS_THROUGH :
2567 CONTEXT_TT_MULTI_LEVEL);
2c2e2c38
FY
2568 if (!ret) {
2569 printk(KERN_INFO "64bit %s uses identity mapping\n",
2570 pci_name(pdev));
2571 return 1;
2572 }
2573 }
2574 }
2575
1e4c64c4 2576 return 0;
2c2e2c38
FY
2577}
2578
bb9e6d65
FT
2579static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr,
2580 size_t size, int dir, u64 dma_mask)
f76aec76
KA
2581{
2582 struct pci_dev *pdev = to_pci_dev(hwdev);
f76aec76 2583 struct dmar_domain *domain;
5b6985ce 2584 phys_addr_t start_paddr;
f76aec76
KA
2585 struct iova *iova;
2586 int prot = 0;
6865f0d1 2587 int ret;
8c11e798 2588 struct intel_iommu *iommu;
33041ec0 2589 unsigned long paddr_pfn = paddr >> PAGE_SHIFT;
f76aec76
KA
2590
2591 BUG_ON(dir == DMA_NONE);
2c2e2c38 2592
73676832 2593 if (iommu_no_mapping(hwdev))
6865f0d1 2594 return paddr;
f76aec76
KA
2595
2596 domain = get_valid_domain_for_dev(pdev);
2597 if (!domain)
2598 return 0;
2599
8c11e798 2600 iommu = domain_get_iommu(domain);
88cb6a74 2601 size = aligned_nrpages(paddr, size);
f76aec76 2602
5a5e02a6
DW
2603 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2604 pdev->dma_mask);
f76aec76
KA
2605 if (!iova)
2606 goto error;
2607
ba395927
KA
2608 /*
2609 * Check if DMAR supports zero-length reads on write only
2610 * mappings..
2611 */
2612 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2613 !cap_zlr(iommu->cap))
ba395927
KA
2614 prot |= DMA_PTE_READ;
2615 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2616 prot |= DMA_PTE_WRITE;
2617 /*
6865f0d1 2618 * paddr - (paddr + size) might be partial page, we should map the whole
ba395927 2619 * page. Note: if two part of one page are separately mapped, we
6865f0d1 2620 * might have two guest_addr mapping to the same host paddr, but this
ba395927
KA
2621 * is not a big problem
2622 */
0ab36de2 2623 ret = domain_pfn_mapping(domain, mm_to_dma_pfn(iova->pfn_lo),
33041ec0 2624 mm_to_dma_pfn(paddr_pfn), size, prot);
ba395927
KA
2625 if (ret)
2626 goto error;
2627
1f0ef2aa
DW
2628 /* it's a non-present to present mapping. Only flush if caching mode */
2629 if (cap_caching_mode(iommu->cap))
03d6a246 2630 iommu_flush_iotlb_psi(iommu, 0, mm_to_dma_pfn(iova->pfn_lo), size);
1f0ef2aa 2631 else
8c11e798 2632 iommu_flush_write_buffer(iommu);
f76aec76 2633
03d6a246
DW
2634 start_paddr = (phys_addr_t)iova->pfn_lo << PAGE_SHIFT;
2635 start_paddr += paddr & ~PAGE_MASK;
2636 return start_paddr;
ba395927 2637
ba395927 2638error:
f76aec76
KA
2639 if (iova)
2640 __free_iova(&domain->iovad, iova);
4cf2e75d 2641 printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n",
5b6985ce 2642 pci_name(pdev), size, (unsigned long long)paddr, dir);
ba395927
KA
2643 return 0;
2644}
2645
ffbbef5c
FT
2646static dma_addr_t intel_map_page(struct device *dev, struct page *page,
2647 unsigned long offset, size_t size,
2648 enum dma_data_direction dir,
2649 struct dma_attrs *attrs)
bb9e6d65 2650{
ffbbef5c
FT
2651 return __intel_map_single(dev, page_to_phys(page) + offset, size,
2652 dir, to_pci_dev(dev)->dma_mask);
bb9e6d65
FT
2653}
2654
5e0d2a6f 2655static void flush_unmaps(void)
2656{
80b20dd8 2657 int i, j;
5e0d2a6f 2658
5e0d2a6f 2659 timer_on = 0;
2660
2661 /* just flush them all */
2662 for (i = 0; i < g_num_of_iommus; i++) {
a2bb8459
WH
2663 struct intel_iommu *iommu = g_iommus[i];
2664 if (!iommu)
2665 continue;
c42d9f32 2666
9dd2fe89
YZ
2667 if (!deferred_flush[i].next)
2668 continue;
2669
2670 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
93a23a72 2671 DMA_TLB_GLOBAL_FLUSH);
9dd2fe89 2672 for (j = 0; j < deferred_flush[i].next; j++) {
93a23a72
YZ
2673 unsigned long mask;
2674 struct iova *iova = deferred_flush[i].iova[j];
2675
64de5af0 2676 mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1));
93a23a72 2677 iommu_flush_dev_iotlb(deferred_flush[i].domain[j],
64de5af0 2678 (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask);
93a23a72 2679 __free_iova(&deferred_flush[i].domain[j]->iovad, iova);
80b20dd8 2680 }
9dd2fe89 2681 deferred_flush[i].next = 0;
5e0d2a6f 2682 }
2683
5e0d2a6f 2684 list_size = 0;
5e0d2a6f 2685}
2686
2687static void flush_unmaps_timeout(unsigned long data)
2688{
80b20dd8 2689 unsigned long flags;
2690
2691 spin_lock_irqsave(&async_umap_flush_lock, flags);
5e0d2a6f 2692 flush_unmaps();
80b20dd8 2693 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
5e0d2a6f 2694}
2695
2696static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2697{
2698 unsigned long flags;
80b20dd8 2699 int next, iommu_id;
8c11e798 2700 struct intel_iommu *iommu;
5e0d2a6f 2701
2702 spin_lock_irqsave(&async_umap_flush_lock, flags);
80b20dd8 2703 if (list_size == HIGH_WATER_MARK)
2704 flush_unmaps();
2705
8c11e798
WH
2706 iommu = domain_get_iommu(dom);
2707 iommu_id = iommu->seq_id;
c42d9f32 2708
80b20dd8 2709 next = deferred_flush[iommu_id].next;
2710 deferred_flush[iommu_id].domain[next] = dom;
2711 deferred_flush[iommu_id].iova[next] = iova;
2712 deferred_flush[iommu_id].next++;
5e0d2a6f 2713
2714 if (!timer_on) {
2715 mod_timer(&unmap_timer, jiffies + msecs_to_jiffies(10));
2716 timer_on = 1;
2717 }
2718 list_size++;
2719 spin_unlock_irqrestore(&async_umap_flush_lock, flags);
2720}
2721
ffbbef5c
FT
2722static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr,
2723 size_t size, enum dma_data_direction dir,
2724 struct dma_attrs *attrs)
ba395927 2725{
ba395927 2726 struct pci_dev *pdev = to_pci_dev(dev);
f76aec76 2727 struct dmar_domain *domain;
d794dc9b 2728 unsigned long start_pfn, last_pfn;
ba395927 2729 struct iova *iova;
8c11e798 2730 struct intel_iommu *iommu;
ba395927 2731
73676832 2732 if (iommu_no_mapping(dev))
f76aec76 2733 return;
2c2e2c38 2734
ba395927
KA
2735 domain = find_domain(pdev);
2736 BUG_ON(!domain);
2737
8c11e798
WH
2738 iommu = domain_get_iommu(domain);
2739
ba395927 2740 iova = find_iova(&domain->iovad, IOVA_PFN(dev_addr));
85b98276
DW
2741 if (WARN_ONCE(!iova, "Driver unmaps unmatched page at PFN %llx\n",
2742 (unsigned long long)dev_addr))
ba395927 2743 return;
ba395927 2744
d794dc9b
DW
2745 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2746 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
ba395927 2747
d794dc9b
DW
2748 pr_debug("Device %s unmapping: pfn %lx-%lx\n",
2749 pci_name(pdev), start_pfn, last_pfn);
ba395927 2750
f76aec76 2751 /* clear the whole page */
d794dc9b
DW
2752 dma_pte_clear_range(domain, start_pfn, last_pfn);
2753
f76aec76 2754 /* free page tables */
d794dc9b
DW
2755 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
2756
5e0d2a6f 2757 if (intel_iommu_strict) {
03d6a246 2758 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
d794dc9b 2759 last_pfn - start_pfn + 1);
5e0d2a6f 2760 /* free iova */
2761 __free_iova(&domain->iovad, iova);
2762 } else {
2763 add_unmap(domain, iova);
2764 /*
2765 * queue up the release of the unmap to save the 1/6th of the
2766 * cpu used up by the iotlb flush operation...
2767 */
5e0d2a6f 2768 }
ba395927
KA
2769}
2770
d7ab5c46
FT
2771static void *intel_alloc_coherent(struct device *hwdev, size_t size,
2772 dma_addr_t *dma_handle, gfp_t flags)
ba395927
KA
2773{
2774 void *vaddr;
2775 int order;
2776
5b6985ce 2777 size = PAGE_ALIGN(size);
ba395927
KA
2778 order = get_order(size);
2779 flags &= ~(GFP_DMA | GFP_DMA32);
2780
2781 vaddr = (void *)__get_free_pages(flags, order);
2782 if (!vaddr)
2783 return NULL;
2784 memset(vaddr, 0, size);
2785
bb9e6d65
FT
2786 *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size,
2787 DMA_BIDIRECTIONAL,
2788 hwdev->coherent_dma_mask);
ba395927
KA
2789 if (*dma_handle)
2790 return vaddr;
2791 free_pages((unsigned long)vaddr, order);
2792 return NULL;
2793}
2794
d7ab5c46
FT
2795static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr,
2796 dma_addr_t dma_handle)
ba395927
KA
2797{
2798 int order;
2799
5b6985ce 2800 size = PAGE_ALIGN(size);
ba395927
KA
2801 order = get_order(size);
2802
0db9b7ae 2803 intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL);
ba395927
KA
2804 free_pages((unsigned long)vaddr, order);
2805}
2806
d7ab5c46
FT
2807static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist,
2808 int nelems, enum dma_data_direction dir,
2809 struct dma_attrs *attrs)
ba395927 2810{
ba395927
KA
2811 struct pci_dev *pdev = to_pci_dev(hwdev);
2812 struct dmar_domain *domain;
d794dc9b 2813 unsigned long start_pfn, last_pfn;
f76aec76 2814 struct iova *iova;
8c11e798 2815 struct intel_iommu *iommu;
ba395927 2816
73676832 2817 if (iommu_no_mapping(hwdev))
ba395927
KA
2818 return;
2819
2820 domain = find_domain(pdev);
8c11e798
WH
2821 BUG_ON(!domain);
2822
2823 iommu = domain_get_iommu(domain);
ba395927 2824
c03ab37c 2825 iova = find_iova(&domain->iovad, IOVA_PFN(sglist[0].dma_address));
85b98276
DW
2826 if (WARN_ONCE(!iova, "Driver unmaps unmatched sglist at PFN %llx\n",
2827 (unsigned long long)sglist[0].dma_address))
f76aec76 2828 return;
f76aec76 2829
d794dc9b
DW
2830 start_pfn = mm_to_dma_pfn(iova->pfn_lo);
2831 last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1;
f76aec76
KA
2832
2833 /* clear the whole page */
d794dc9b
DW
2834 dma_pte_clear_range(domain, start_pfn, last_pfn);
2835
f76aec76 2836 /* free page tables */
d794dc9b 2837 dma_pte_free_pagetable(domain, start_pfn, last_pfn);
f76aec76 2838
acea0018
DW
2839 if (intel_iommu_strict) {
2840 iommu_flush_iotlb_psi(iommu, domain->id, start_pfn,
2841 last_pfn - start_pfn + 1);
2842 /* free iova */
2843 __free_iova(&domain->iovad, iova);
2844 } else {
2845 add_unmap(domain, iova);
2846 /*
2847 * queue up the release of the unmap to save the 1/6th of the
2848 * cpu used up by the iotlb flush operation...
2849 */
2850 }
ba395927
KA
2851}
2852
ba395927 2853static int intel_nontranslate_map_sg(struct device *hddev,
c03ab37c 2854 struct scatterlist *sglist, int nelems, int dir)
ba395927
KA
2855{
2856 int i;
c03ab37c 2857 struct scatterlist *sg;
ba395927 2858
c03ab37c 2859 for_each_sg(sglist, sg, nelems, i) {
12d4d40e 2860 BUG_ON(!sg_page(sg));
4cf2e75d 2861 sg->dma_address = page_to_phys(sg_page(sg)) + sg->offset;
c03ab37c 2862 sg->dma_length = sg->length;
ba395927
KA
2863 }
2864 return nelems;
2865}
2866
d7ab5c46
FT
2867static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems,
2868 enum dma_data_direction dir, struct dma_attrs *attrs)
ba395927 2869{
ba395927 2870 int i;
ba395927
KA
2871 struct pci_dev *pdev = to_pci_dev(hwdev);
2872 struct dmar_domain *domain;
f76aec76
KA
2873 size_t size = 0;
2874 int prot = 0;
b536d24d 2875 size_t offset_pfn = 0;
f76aec76
KA
2876 struct iova *iova = NULL;
2877 int ret;
c03ab37c 2878 struct scatterlist *sg;
b536d24d 2879 unsigned long start_vpfn;
8c11e798 2880 struct intel_iommu *iommu;
ba395927
KA
2881
2882 BUG_ON(dir == DMA_NONE);
73676832 2883 if (iommu_no_mapping(hwdev))
c03ab37c 2884 return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir);
ba395927 2885
f76aec76
KA
2886 domain = get_valid_domain_for_dev(pdev);
2887 if (!domain)
2888 return 0;
2889
8c11e798
WH
2890 iommu = domain_get_iommu(domain);
2891
b536d24d 2892 for_each_sg(sglist, sg, nelems, i)
88cb6a74 2893 size += aligned_nrpages(sg->offset, sg->length);
f76aec76 2894
5a5e02a6
DW
2895 iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size),
2896 pdev->dma_mask);
f76aec76 2897 if (!iova) {
c03ab37c 2898 sglist->dma_length = 0;
f76aec76
KA
2899 return 0;
2900 }
2901
2902 /*
2903 * Check if DMAR supports zero-length reads on write only
2904 * mappings..
2905 */
2906 if (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL || \
8c11e798 2907 !cap_zlr(iommu->cap))
f76aec76
KA
2908 prot |= DMA_PTE_READ;
2909 if (dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)
2910 prot |= DMA_PTE_WRITE;
2911
b536d24d 2912 start_vpfn = mm_to_dma_pfn(iova->pfn_lo);
e1605495 2913
f532959b 2914 ret = domain_sg_mapping(domain, start_vpfn, sglist, size, prot);
e1605495
DW
2915 if (unlikely(ret)) {
2916 /* clear the page */
2917 dma_pte_clear_range(domain, start_vpfn,
2918 start_vpfn + size - 1);
2919 /* free page tables */
2920 dma_pte_free_pagetable(domain, start_vpfn,
2921 start_vpfn + size - 1);
2922 /* free iova */
2923 __free_iova(&domain->iovad, iova);
2924 return 0;
ba395927
KA
2925 }
2926
1f0ef2aa
DW
2927 /* it's a non-present to present mapping. Only flush if caching mode */
2928 if (cap_caching_mode(iommu->cap))
03d6a246 2929 iommu_flush_iotlb_psi(iommu, 0, start_vpfn, offset_pfn);
1f0ef2aa 2930 else
8c11e798 2931 iommu_flush_write_buffer(iommu);
1f0ef2aa 2932
ba395927
KA
2933 return nelems;
2934}
2935
dfb805e8
FT
2936static int intel_mapping_error(struct device *dev, dma_addr_t dma_addr)
2937{
2938 return !dma_addr;
2939}
2940
160c1d8e 2941struct dma_map_ops intel_dma_ops = {
ba395927
KA
2942 .alloc_coherent = intel_alloc_coherent,
2943 .free_coherent = intel_free_coherent,
ba395927
KA
2944 .map_sg = intel_map_sg,
2945 .unmap_sg = intel_unmap_sg,
ffbbef5c
FT
2946 .map_page = intel_map_page,
2947 .unmap_page = intel_unmap_page,
dfb805e8 2948 .mapping_error = intel_mapping_error,
ba395927
KA
2949};
2950
2951static inline int iommu_domain_cache_init(void)
2952{
2953 int ret = 0;
2954
2955 iommu_domain_cache = kmem_cache_create("iommu_domain",
2956 sizeof(struct dmar_domain),
2957 0,
2958 SLAB_HWCACHE_ALIGN,
2959
2960 NULL);
2961 if (!iommu_domain_cache) {
2962 printk(KERN_ERR "Couldn't create iommu_domain cache\n");
2963 ret = -ENOMEM;
2964 }
2965
2966 return ret;
2967}
2968
2969static inline int iommu_devinfo_cache_init(void)
2970{
2971 int ret = 0;
2972
2973 iommu_devinfo_cache = kmem_cache_create("iommu_devinfo",
2974 sizeof(struct device_domain_info),
2975 0,
2976 SLAB_HWCACHE_ALIGN,
ba395927
KA
2977 NULL);
2978 if (!iommu_devinfo_cache) {
2979 printk(KERN_ERR "Couldn't create devinfo cache\n");
2980 ret = -ENOMEM;
2981 }
2982
2983 return ret;
2984}
2985
2986static inline int iommu_iova_cache_init(void)
2987{
2988 int ret = 0;
2989
2990 iommu_iova_cache = kmem_cache_create("iommu_iova",
2991 sizeof(struct iova),
2992 0,
2993 SLAB_HWCACHE_ALIGN,
ba395927
KA
2994 NULL);
2995 if (!iommu_iova_cache) {
2996 printk(KERN_ERR "Couldn't create iova cache\n");
2997 ret = -ENOMEM;
2998 }
2999
3000 return ret;
3001}
3002
3003static int __init iommu_init_mempool(void)
3004{
3005 int ret;
3006 ret = iommu_iova_cache_init();
3007 if (ret)
3008 return ret;
3009
3010 ret = iommu_domain_cache_init();
3011 if (ret)
3012 goto domain_error;
3013
3014 ret = iommu_devinfo_cache_init();
3015 if (!ret)
3016 return ret;
3017
3018 kmem_cache_destroy(iommu_domain_cache);
3019domain_error:
3020 kmem_cache_destroy(iommu_iova_cache);
3021
3022 return -ENOMEM;
3023}
3024
3025static void __init iommu_exit_mempool(void)
3026{
3027 kmem_cache_destroy(iommu_devinfo_cache);
3028 kmem_cache_destroy(iommu_domain_cache);
3029 kmem_cache_destroy(iommu_iova_cache);
3030
3031}
3032
ba395927
KA
3033static void __init init_no_remapping_devices(void)
3034{
3035 struct dmar_drhd_unit *drhd;
3036
3037 for_each_drhd_unit(drhd) {
3038 if (!drhd->include_all) {
3039 int i;
3040 for (i = 0; i < drhd->devices_cnt; i++)
3041 if (drhd->devices[i] != NULL)
3042 break;
3043 /* ignore DMAR unit if no pci devices exist */
3044 if (i == drhd->devices_cnt)
3045 drhd->ignored = 1;
3046 }
3047 }
3048
3049 if (dmar_map_gfx)
3050 return;
3051
3052 for_each_drhd_unit(drhd) {
3053 int i;
3054 if (drhd->ignored || drhd->include_all)
3055 continue;
3056
3057 for (i = 0; i < drhd->devices_cnt; i++)
3058 if (drhd->devices[i] &&
3059 !IS_GFX_DEVICE(drhd->devices[i]))
3060 break;
3061
3062 if (i < drhd->devices_cnt)
3063 continue;
3064
3065 /* bypass IOMMU if it is just for gfx devices */
3066 drhd->ignored = 1;
3067 for (i = 0; i < drhd->devices_cnt; i++) {
3068 if (!drhd->devices[i])
3069 continue;
358dd8ac 3070 drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO;
ba395927
KA
3071 }
3072 }
3073}
3074
f59c7b69
FY
3075#ifdef CONFIG_SUSPEND
3076static int init_iommu_hw(void)
3077{
3078 struct dmar_drhd_unit *drhd;
3079 struct intel_iommu *iommu = NULL;
3080
3081 for_each_active_iommu(iommu, drhd)
3082 if (iommu->qi)
3083 dmar_reenable_qi(iommu);
3084
3085 for_each_active_iommu(iommu, drhd) {
3086 iommu_flush_write_buffer(iommu);
3087
3088 iommu_set_root_entry(iommu);
3089
3090 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3091 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3092 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3093 DMA_TLB_GLOBAL_FLUSH);
f59c7b69 3094 iommu_enable_translation(iommu);
b94996c9 3095 iommu_disable_protect_mem_regions(iommu);
f59c7b69
FY
3096 }
3097
3098 return 0;
3099}
3100
3101static void iommu_flush_all(void)
3102{
3103 struct dmar_drhd_unit *drhd;
3104 struct intel_iommu *iommu;
3105
3106 for_each_active_iommu(iommu, drhd) {
3107 iommu->flush.flush_context(iommu, 0, 0, 0,
1f0ef2aa 3108 DMA_CCMD_GLOBAL_INVL);
f59c7b69 3109 iommu->flush.flush_iotlb(iommu, 0, 0, 0,
1f0ef2aa 3110 DMA_TLB_GLOBAL_FLUSH);
f59c7b69
FY
3111 }
3112}
3113
3114static int iommu_suspend(struct sys_device *dev, pm_message_t state)
3115{
3116 struct dmar_drhd_unit *drhd;
3117 struct intel_iommu *iommu = NULL;
3118 unsigned long flag;
3119
3120 for_each_active_iommu(iommu, drhd) {
3121 iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS,
3122 GFP_ATOMIC);
3123 if (!iommu->iommu_state)
3124 goto nomem;
3125 }
3126
3127 iommu_flush_all();
3128
3129 for_each_active_iommu(iommu, drhd) {
3130 iommu_disable_translation(iommu);
3131
3132 spin_lock_irqsave(&iommu->register_lock, flag);
3133
3134 iommu->iommu_state[SR_DMAR_FECTL_REG] =
3135 readl(iommu->reg + DMAR_FECTL_REG);
3136 iommu->iommu_state[SR_DMAR_FEDATA_REG] =
3137 readl(iommu->reg + DMAR_FEDATA_REG);
3138 iommu->iommu_state[SR_DMAR_FEADDR_REG] =
3139 readl(iommu->reg + DMAR_FEADDR_REG);
3140 iommu->iommu_state[SR_DMAR_FEUADDR_REG] =
3141 readl(iommu->reg + DMAR_FEUADDR_REG);
3142
3143 spin_unlock_irqrestore(&iommu->register_lock, flag);
3144 }
3145 return 0;
3146
3147nomem:
3148 for_each_active_iommu(iommu, drhd)
3149 kfree(iommu->iommu_state);
3150
3151 return -ENOMEM;
3152}
3153
3154static int iommu_resume(struct sys_device *dev)
3155{
3156 struct dmar_drhd_unit *drhd;
3157 struct intel_iommu *iommu = NULL;
3158 unsigned long flag;
3159
3160 if (init_iommu_hw()) {
3161 WARN(1, "IOMMU setup failed, DMAR can not resume!\n");
3162 return -EIO;
3163 }
3164
3165 for_each_active_iommu(iommu, drhd) {
3166
3167 spin_lock_irqsave(&iommu->register_lock, flag);
3168
3169 writel(iommu->iommu_state[SR_DMAR_FECTL_REG],
3170 iommu->reg + DMAR_FECTL_REG);
3171 writel(iommu->iommu_state[SR_DMAR_FEDATA_REG],
3172 iommu->reg + DMAR_FEDATA_REG);
3173 writel(iommu->iommu_state[SR_DMAR_FEADDR_REG],
3174 iommu->reg + DMAR_FEADDR_REG);
3175 writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG],
3176 iommu->reg + DMAR_FEUADDR_REG);
3177
3178 spin_unlock_irqrestore(&iommu->register_lock, flag);
3179 }
3180
3181 for_each_active_iommu(iommu, drhd)
3182 kfree(iommu->iommu_state);
3183
3184 return 0;
3185}
3186
3187static struct sysdev_class iommu_sysclass = {
3188 .name = "iommu",
3189 .resume = iommu_resume,
3190 .suspend = iommu_suspend,
3191};
3192
3193static struct sys_device device_iommu = {
3194 .cls = &iommu_sysclass,
3195};
3196
3197static int __init init_iommu_sysfs(void)
3198{
3199 int error;
3200
3201 error = sysdev_class_register(&iommu_sysclass);
3202 if (error)
3203 return error;
3204
3205 error = sysdev_register(&device_iommu);
3206 if (error)
3207 sysdev_class_unregister(&iommu_sysclass);
3208
3209 return error;
3210}
3211
3212#else
3213static int __init init_iommu_sysfs(void)
3214{
3215 return 0;
3216}
3217#endif /* CONFIG_PM */
3218
ba395927
KA
3219int __init intel_iommu_init(void)
3220{
3221 int ret = 0;
a59b50e9 3222 int force_on = 0;
ba395927 3223
a59b50e9
JC
3224 /* VT-d is required for a TXT/tboot launch, so enforce that */
3225 force_on = tboot_force_iommu();
3226
3227 if (dmar_table_init()) {
3228 if (force_on)
3229 panic("tboot: Failed to initialize DMAR table\n");
ba395927 3230 return -ENODEV;
a59b50e9 3231 }
ba395927 3232
a59b50e9
JC
3233 if (dmar_dev_scope_init()) {
3234 if (force_on)
3235 panic("tboot: Failed to initialize DMAR device scope\n");
1886e8a9 3236 return -ENODEV;
a59b50e9 3237 }
1886e8a9 3238
2ae21010
SS
3239 /*
3240 * Check the need for DMA-remapping initialization now.
3241 * Above initialization will also be used by Interrupt-remapping.
3242 */
19943b0e 3243 if (no_iommu || swiotlb || dmar_disabled)
2ae21010
SS
3244 return -ENODEV;
3245
ba395927
KA
3246 iommu_init_mempool();
3247 dmar_init_reserved_ranges();
3248
3249 init_no_remapping_devices();
3250
3251 ret = init_dmars();
3252 if (ret) {
a59b50e9
JC
3253 if (force_on)
3254 panic("tboot: Failed to initialize DMARs\n");
ba395927
KA
3255 printk(KERN_ERR "IOMMU: dmar init failed\n");
3256 put_iova_domain(&reserved_iova_list);
3257 iommu_exit_mempool();
3258 return ret;
3259 }
3260 printk(KERN_INFO
3261 "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n");
3262
5e0d2a6f 3263 init_timer(&unmap_timer);
ba395927 3264 force_iommu = 1;
19943b0e 3265 dma_ops = &intel_dma_ops;
4ed0d3e6 3266
f59c7b69 3267 init_iommu_sysfs();
a8bcbb0d
JR
3268
3269 register_iommu(&intel_iommu_ops);
3270
ba395927
KA
3271 return 0;
3272}
e820482c 3273
3199aa6b
HW
3274static void iommu_detach_dependent_devices(struct intel_iommu *iommu,
3275 struct pci_dev *pdev)
3276{
3277 struct pci_dev *tmp, *parent;
3278
3279 if (!iommu || !pdev)
3280 return;
3281
3282 /* dependent device detach */
3283 tmp = pci_find_upstream_pcie_bridge(pdev);
3284 /* Secondary interface's bus number and devfn 0 */
3285 if (tmp) {
3286 parent = pdev->bus->self;
3287 while (parent != tmp) {
3288 iommu_detach_dev(iommu, parent->bus->number,
276dbf99 3289 parent->devfn);
3199aa6b
HW
3290 parent = parent->bus->self;
3291 }
3292 if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */
3293 iommu_detach_dev(iommu,
3294 tmp->subordinate->number, 0);
3295 else /* this is a legacy PCI bridge */
276dbf99
DW
3296 iommu_detach_dev(iommu, tmp->bus->number,
3297 tmp->devfn);
3199aa6b
HW
3298 }
3299}
3300
2c2e2c38 3301static void domain_remove_one_dev_info(struct dmar_domain *domain,
c7151a8d
WH
3302 struct pci_dev *pdev)
3303{
3304 struct device_domain_info *info;
3305 struct intel_iommu *iommu;
3306 unsigned long flags;
3307 int found = 0;
3308 struct list_head *entry, *tmp;
3309
276dbf99
DW
3310 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3311 pdev->devfn);
c7151a8d
WH
3312 if (!iommu)
3313 return;
3314
3315 spin_lock_irqsave(&device_domain_lock, flags);
3316 list_for_each_safe(entry, tmp, &domain->devices) {
3317 info = list_entry(entry, struct device_domain_info, link);
276dbf99 3318 /* No need to compare PCI domain; it has to be the same */
c7151a8d
WH
3319 if (info->bus == pdev->bus->number &&
3320 info->devfn == pdev->devfn) {
3321 list_del(&info->link);
3322 list_del(&info->global);
3323 if (info->dev)
3324 info->dev->dev.archdata.iommu = NULL;
3325 spin_unlock_irqrestore(&device_domain_lock, flags);
3326
93a23a72 3327 iommu_disable_dev_iotlb(info);
c7151a8d 3328 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3329 iommu_detach_dependent_devices(iommu, pdev);
c7151a8d
WH
3330 free_devinfo_mem(info);
3331
3332 spin_lock_irqsave(&device_domain_lock, flags);
3333
3334 if (found)
3335 break;
3336 else
3337 continue;
3338 }
3339
3340 /* if there is no other devices under the same iommu
3341 * owned by this domain, clear this iommu in iommu_bmp
3342 * update iommu count and coherency
3343 */
276dbf99
DW
3344 if (iommu == device_to_iommu(info->segment, info->bus,
3345 info->devfn))
c7151a8d
WH
3346 found = 1;
3347 }
3348
3349 if (found == 0) {
3350 unsigned long tmp_flags;
3351 spin_lock_irqsave(&domain->iommu_lock, tmp_flags);
3352 clear_bit(iommu->seq_id, &domain->iommu_bmp);
3353 domain->iommu_count--;
58c610bd 3354 domain_update_iommu_cap(domain);
c7151a8d
WH
3355 spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags);
3356 }
3357
3358 spin_unlock_irqrestore(&device_domain_lock, flags);
3359}
3360
3361static void vm_domain_remove_all_dev_info(struct dmar_domain *domain)
3362{
3363 struct device_domain_info *info;
3364 struct intel_iommu *iommu;
3365 unsigned long flags1, flags2;
3366
3367 spin_lock_irqsave(&device_domain_lock, flags1);
3368 while (!list_empty(&domain->devices)) {
3369 info = list_entry(domain->devices.next,
3370 struct device_domain_info, link);
3371 list_del(&info->link);
3372 list_del(&info->global);
3373 if (info->dev)
3374 info->dev->dev.archdata.iommu = NULL;
3375
3376 spin_unlock_irqrestore(&device_domain_lock, flags1);
3377
93a23a72 3378 iommu_disable_dev_iotlb(info);
276dbf99 3379 iommu = device_to_iommu(info->segment, info->bus, info->devfn);
c7151a8d 3380 iommu_detach_dev(iommu, info->bus, info->devfn);
3199aa6b 3381 iommu_detach_dependent_devices(iommu, info->dev);
c7151a8d
WH
3382
3383 /* clear this iommu in iommu_bmp, update iommu count
58c610bd 3384 * and capabilities
c7151a8d
WH
3385 */
3386 spin_lock_irqsave(&domain->iommu_lock, flags2);
3387 if (test_and_clear_bit(iommu->seq_id,
3388 &domain->iommu_bmp)) {
3389 domain->iommu_count--;
58c610bd 3390 domain_update_iommu_cap(domain);
c7151a8d
WH
3391 }
3392 spin_unlock_irqrestore(&domain->iommu_lock, flags2);
3393
3394 free_devinfo_mem(info);
3395 spin_lock_irqsave(&device_domain_lock, flags1);
3396 }
3397 spin_unlock_irqrestore(&device_domain_lock, flags1);
3398}
3399
5e98c4b1
WH
3400/* domain id for virtual machine, it won't be set in context */
3401static unsigned long vm_domid;
3402
fe40f1e0
WH
3403static int vm_domain_min_agaw(struct dmar_domain *domain)
3404{
3405 int i;
3406 int min_agaw = domain->agaw;
3407
3408 i = find_first_bit(&domain->iommu_bmp, g_num_of_iommus);
3409 for (; i < g_num_of_iommus; ) {
3410 if (min_agaw > g_iommus[i]->agaw)
3411 min_agaw = g_iommus[i]->agaw;
3412
3413 i = find_next_bit(&domain->iommu_bmp, g_num_of_iommus, i+1);
3414 }
3415
3416 return min_agaw;
3417}
3418
5e98c4b1
WH
3419static struct dmar_domain *iommu_alloc_vm_domain(void)
3420{
3421 struct dmar_domain *domain;
3422
3423 domain = alloc_domain_mem();
3424 if (!domain)
3425 return NULL;
3426
3427 domain->id = vm_domid++;
4c923d47 3428 domain->nid = -1;
5e98c4b1
WH
3429 memset(&domain->iommu_bmp, 0, sizeof(unsigned long));
3430 domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE;
3431
3432 return domain;
3433}
3434
2c2e2c38 3435static int md_domain_init(struct dmar_domain *domain, int guest_width)
5e98c4b1
WH
3436{
3437 int adjust_width;
3438
3439 init_iova_domain(&domain->iovad, DMA_32BIT_PFN);
5e98c4b1
WH
3440 spin_lock_init(&domain->iommu_lock);
3441
3442 domain_reserve_special_ranges(domain);
3443
3444 /* calculate AGAW */
3445 domain->gaw = guest_width;
3446 adjust_width = guestwidth_to_adjustwidth(guest_width);
3447 domain->agaw = width_to_agaw(adjust_width);
3448
3449 INIT_LIST_HEAD(&domain->devices);
3450
3451 domain->iommu_count = 0;
3452 domain->iommu_coherency = 0;
c5b15255 3453 domain->iommu_snooping = 0;
fe40f1e0 3454 domain->max_addr = 0;
4c923d47 3455 domain->nid = -1;
5e98c4b1
WH
3456
3457 /* always allocate the top pgd */
4c923d47 3458 domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid);
5e98c4b1
WH
3459 if (!domain->pgd)
3460 return -ENOMEM;
3461 domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
3462 return 0;
3463}
3464
3465static void iommu_free_vm_domain(struct dmar_domain *domain)
3466{
3467 unsigned long flags;
3468 struct dmar_drhd_unit *drhd;
3469 struct intel_iommu *iommu;
3470 unsigned long i;
3471 unsigned long ndomains;
3472
3473 for_each_drhd_unit(drhd) {
3474 if (drhd->ignored)
3475 continue;
3476 iommu = drhd->iommu;
3477
3478 ndomains = cap_ndoms(iommu->cap);
3479 i = find_first_bit(iommu->domain_ids, ndomains);
3480 for (; i < ndomains; ) {
3481 if (iommu->domains[i] == domain) {
3482 spin_lock_irqsave(&iommu->lock, flags);
3483 clear_bit(i, iommu->domain_ids);
3484 iommu->domains[i] = NULL;
3485 spin_unlock_irqrestore(&iommu->lock, flags);
3486 break;
3487 }
3488 i = find_next_bit(iommu->domain_ids, ndomains, i+1);
3489 }
3490 }
3491}
3492
3493static void vm_domain_exit(struct dmar_domain *domain)
3494{
5e98c4b1
WH
3495 /* Domain 0 is reserved, so dont process it */
3496 if (!domain)
3497 return;
3498
3499 vm_domain_remove_all_dev_info(domain);
3500 /* destroy iovas */
3501 put_iova_domain(&domain->iovad);
5e98c4b1
WH
3502
3503 /* clear ptes */
595badf5 3504 dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3505
3506 /* free page tables */
d794dc9b 3507 dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw));
5e98c4b1
WH
3508
3509 iommu_free_vm_domain(domain);
3510 free_domain_mem(domain);
3511}
3512
5d450806 3513static int intel_iommu_domain_init(struct iommu_domain *domain)
38717946 3514{
5d450806 3515 struct dmar_domain *dmar_domain;
38717946 3516
5d450806
JR
3517 dmar_domain = iommu_alloc_vm_domain();
3518 if (!dmar_domain) {
38717946 3519 printk(KERN_ERR
5d450806
JR
3520 "intel_iommu_domain_init: dmar_domain == NULL\n");
3521 return -ENOMEM;
38717946 3522 }
2c2e2c38 3523 if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
38717946 3524 printk(KERN_ERR
5d450806
JR
3525 "intel_iommu_domain_init() failed\n");
3526 vm_domain_exit(dmar_domain);
3527 return -ENOMEM;
38717946 3528 }
5d450806 3529 domain->priv = dmar_domain;
faa3d6f5 3530
5d450806 3531 return 0;
38717946 3532}
38717946 3533
5d450806 3534static void intel_iommu_domain_destroy(struct iommu_domain *domain)
38717946 3535{
5d450806
JR
3536 struct dmar_domain *dmar_domain = domain->priv;
3537
3538 domain->priv = NULL;
3539 vm_domain_exit(dmar_domain);
38717946 3540}
38717946 3541
4c5478c9
JR
3542static int intel_iommu_attach_device(struct iommu_domain *domain,
3543 struct device *dev)
38717946 3544{
4c5478c9
JR
3545 struct dmar_domain *dmar_domain = domain->priv;
3546 struct pci_dev *pdev = to_pci_dev(dev);
fe40f1e0
WH
3547 struct intel_iommu *iommu;
3548 int addr_width;
3549 u64 end;
faa3d6f5
WH
3550
3551 /* normally pdev is not mapped */
3552 if (unlikely(domain_context_mapped(pdev))) {
3553 struct dmar_domain *old_domain;
3554
3555 old_domain = find_domain(pdev);
3556 if (old_domain) {
2c2e2c38
FY
3557 if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE ||
3558 dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)
3559 domain_remove_one_dev_info(old_domain, pdev);
faa3d6f5
WH
3560 else
3561 domain_remove_dev_info(old_domain);
3562 }
3563 }
3564
276dbf99
DW
3565 iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number,
3566 pdev->devfn);
fe40f1e0
WH
3567 if (!iommu)
3568 return -ENODEV;
3569
3570 /* check if this iommu agaw is sufficient for max mapped address */
3571 addr_width = agaw_to_width(iommu->agaw);
3572 end = DOMAIN_MAX_ADDR(addr_width);
3573 end = end & VTD_PAGE_MASK;
4c5478c9 3574 if (end < dmar_domain->max_addr) {
fe40f1e0
WH
3575 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3576 "sufficient for the mapped address (%llx)\n",
4c5478c9 3577 __func__, iommu->agaw, dmar_domain->max_addr);
fe40f1e0
WH
3578 return -EFAULT;
3579 }
3580
5fe60f4e 3581 return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL);
38717946 3582}
38717946 3583
4c5478c9
JR
3584static void intel_iommu_detach_device(struct iommu_domain *domain,
3585 struct device *dev)
38717946 3586{
4c5478c9
JR
3587 struct dmar_domain *dmar_domain = domain->priv;
3588 struct pci_dev *pdev = to_pci_dev(dev);
3589
2c2e2c38 3590 domain_remove_one_dev_info(dmar_domain, pdev);
faa3d6f5 3591}
c7151a8d 3592
dde57a21
JR
3593static int intel_iommu_map_range(struct iommu_domain *domain,
3594 unsigned long iova, phys_addr_t hpa,
3595 size_t size, int iommu_prot)
faa3d6f5 3596{
dde57a21 3597 struct dmar_domain *dmar_domain = domain->priv;
fe40f1e0
WH
3598 u64 max_addr;
3599 int addr_width;
dde57a21 3600 int prot = 0;
faa3d6f5 3601 int ret;
fe40f1e0 3602
dde57a21
JR
3603 if (iommu_prot & IOMMU_READ)
3604 prot |= DMA_PTE_READ;
3605 if (iommu_prot & IOMMU_WRITE)
3606 prot |= DMA_PTE_WRITE;
9cf06697
SY
3607 if ((iommu_prot & IOMMU_CACHE) && dmar_domain->iommu_snooping)
3608 prot |= DMA_PTE_SNP;
dde57a21 3609
163cc52c 3610 max_addr = iova + size;
dde57a21 3611 if (dmar_domain->max_addr < max_addr) {
fe40f1e0
WH
3612 int min_agaw;
3613 u64 end;
3614
3615 /* check if minimum agaw is sufficient for mapped address */
dde57a21 3616 min_agaw = vm_domain_min_agaw(dmar_domain);
fe40f1e0
WH
3617 addr_width = agaw_to_width(min_agaw);
3618 end = DOMAIN_MAX_ADDR(addr_width);
3619 end = end & VTD_PAGE_MASK;
3620 if (end < max_addr) {
3621 printk(KERN_ERR "%s: iommu agaw (%d) is not "
3622 "sufficient for the mapped address (%llx)\n",
3623 __func__, min_agaw, max_addr);
3624 return -EFAULT;
3625 }
dde57a21 3626 dmar_domain->max_addr = max_addr;
fe40f1e0 3627 }
ad051221
DW
3628 /* Round up size to next multiple of PAGE_SIZE, if it and
3629 the low bits of hpa would take us onto the next page */
88cb6a74 3630 size = aligned_nrpages(hpa, size);
ad051221
DW
3631 ret = domain_pfn_mapping(dmar_domain, iova >> VTD_PAGE_SHIFT,
3632 hpa >> VTD_PAGE_SHIFT, size, prot);
faa3d6f5 3633 return ret;
38717946 3634}
38717946 3635
dde57a21
JR
3636static void intel_iommu_unmap_range(struct iommu_domain *domain,
3637 unsigned long iova, size_t size)
38717946 3638{
dde57a21 3639 struct dmar_domain *dmar_domain = domain->priv;
faa3d6f5 3640
4b99d352
SY
3641 if (!size)
3642 return;
3643
163cc52c
DW
3644 dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT,
3645 (iova + size - 1) >> VTD_PAGE_SHIFT);
fe40f1e0 3646
163cc52c
DW
3647 if (dmar_domain->max_addr == iova + size)
3648 dmar_domain->max_addr = iova;
38717946 3649}
38717946 3650
d14d6577
JR
3651static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
3652 unsigned long iova)
38717946 3653{
d14d6577 3654 struct dmar_domain *dmar_domain = domain->priv;
38717946 3655 struct dma_pte *pte;
faa3d6f5 3656 u64 phys = 0;
38717946 3657
b026fd28 3658 pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT);
38717946 3659 if (pte)
faa3d6f5 3660 phys = dma_pte_addr(pte);
38717946 3661
faa3d6f5 3662 return phys;
38717946 3663}
a8bcbb0d 3664
dbb9fd86
SY
3665static int intel_iommu_domain_has_cap(struct iommu_domain *domain,
3666 unsigned long cap)
3667{
3668 struct dmar_domain *dmar_domain = domain->priv;
3669
3670 if (cap == IOMMU_CAP_CACHE_COHERENCY)
3671 return dmar_domain->iommu_snooping;
3672
3673 return 0;
3674}
3675
a8bcbb0d
JR
3676static struct iommu_ops intel_iommu_ops = {
3677 .domain_init = intel_iommu_domain_init,
3678 .domain_destroy = intel_iommu_domain_destroy,
3679 .attach_dev = intel_iommu_attach_device,
3680 .detach_dev = intel_iommu_detach_device,
3681 .map = intel_iommu_map_range,
3682 .unmap = intel_iommu_unmap_range,
3683 .iova_to_phys = intel_iommu_iova_to_phys,
dbb9fd86 3684 .domain_has_cap = intel_iommu_domain_has_cap,
a8bcbb0d 3685};
9af88143
DW
3686
3687static void __devinit quirk_iommu_rwbf(struct pci_dev *dev)
3688{
3689 /*
3690 * Mobile 4 Series Chipset neglects to set RWBF capability,
3691 * but needs it:
3692 */
3693 printk(KERN_INFO "DMAR: Forcing write-buffer flush capability\n");
3694 rwbf_quirk = 1;
3695}
3696
3697DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, 0x2a40, quirk_iommu_rwbf);
e0fc7e0b
DW
3698
3699/* On Tylersburg chipsets, some BIOSes have been known to enable the
3700 ISOCH DMAR unit for the Azalia sound device, but not give it any
3701 TLB entries, which causes it to deadlock. Check for that. We do
3702 this in a function called from init_dmars(), instead of in a PCI
3703 quirk, because we don't want to print the obnoxious "BIOS broken"
3704 message if VT-d is actually disabled.
3705*/
3706static void __init check_tylersburg_isoch(void)
3707{
3708 struct pci_dev *pdev;
3709 uint32_t vtisochctrl;
3710
3711 /* If there's no Azalia in the system anyway, forget it. */
3712 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x3a3e, NULL);
3713 if (!pdev)
3714 return;
3715 pci_dev_put(pdev);
3716
3717 /* System Management Registers. Might be hidden, in which case
3718 we can't do the sanity check. But that's OK, because the
3719 known-broken BIOSes _don't_ actually hide it, so far. */
3720 pdev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x342e, NULL);
3721 if (!pdev)
3722 return;
3723
3724 if (pci_read_config_dword(pdev, 0x188, &vtisochctrl)) {
3725 pci_dev_put(pdev);
3726 return;
3727 }
3728
3729 pci_dev_put(pdev);
3730
3731 /* If Azalia DMA is routed to the non-isoch DMAR unit, fine. */
3732 if (vtisochctrl & 1)
3733 return;
3734
3735 /* Drop all bits other than the number of TLB entries */
3736 vtisochctrl &= 0x1c;
3737
3738 /* If we have the recommended number of TLB entries (16), fine. */
3739 if (vtisochctrl == 0x10)
3740 return;
3741
3742 /* Zero TLB entries? You get to ride the short bus to school. */
3743 if (!vtisochctrl) {
3744 WARN(1, "Your BIOS is broken; DMA routed to ISOCH DMAR unit but no TLB space.\n"
3745 "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
3746 dmi_get_system_info(DMI_BIOS_VENDOR),
3747 dmi_get_system_info(DMI_BIOS_VERSION),
3748 dmi_get_system_info(DMI_PRODUCT_VERSION));
3749 iommu_identity_mapping |= IDENTMAP_AZALIA;
3750 return;
3751 }
3752
3753 printk(KERN_WARNING "DMAR: Recommended TLB entries for ISOCH unit is 16; your BIOS set %d\n",
3754 vtisochctrl);
3755}