]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/pci/dmar.c
PCI: add pci_request_acs
[net-next-2.6.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38
39 #define PREFIX "DMAR: "
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !pci_is_pcie(bridge) ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357         struct acpi_dmar_rhsa *rhsa;
358
359         switch (header->type) {
360         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
361                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
362                                     header);
363                 printk (KERN_INFO PREFIX
364                         "DRHD base: %#016Lx flags: %#x\n",
365                         (unsigned long long)drhd->address, drhd->flags);
366                 break;
367         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
368                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
369                                     header);
370                 printk (KERN_INFO PREFIX
371                         "RMRR base: %#016Lx end: %#016Lx\n",
372                         (unsigned long long)rmrr->base_address,
373                         (unsigned long long)rmrr->end_address);
374                 break;
375         case ACPI_DMAR_TYPE_ATSR:
376                 atsr = container_of(header, struct acpi_dmar_atsr, header);
377                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
378                 break;
379         case ACPI_DMAR_HARDWARE_AFFINITY:
380                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
381                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
382                        (unsigned long long)rhsa->base_address,
383                        rhsa->proximity_domain);
384                 break;
385         }
386 }
387
388 /**
389  * dmar_table_detect - checks to see if the platform supports DMAR devices
390  */
391 static int __init dmar_table_detect(void)
392 {
393         acpi_status status = AE_OK;
394
395         /* if we could find DMAR table, then there are DMAR devices */
396         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
397                                 (struct acpi_table_header **)&dmar_tbl,
398                                 &dmar_tbl_size);
399
400         if (ACPI_SUCCESS(status) && !dmar_tbl) {
401                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
402                 status = AE_NOT_FOUND;
403         }
404
405         return (ACPI_SUCCESS(status) ? 1 : 0);
406 }
407
408 /**
409  * parse_dmar_table - parses the DMA reporting table
410  */
411 static int __init
412 parse_dmar_table(void)
413 {
414         struct acpi_table_dmar *dmar;
415         struct acpi_dmar_header *entry_header;
416         int ret = 0;
417
418         /*
419          * Do it again, earlier dmar_tbl mapping could be mapped with
420          * fixed map.
421          */
422         dmar_table_detect();
423
424         /*
425          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
426          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
427          */
428         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
429
430         dmar = (struct acpi_table_dmar *)dmar_tbl;
431         if (!dmar)
432                 return -ENODEV;
433
434         if (dmar->width < PAGE_SHIFT - 1) {
435                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
436                 return -EINVAL;
437         }
438
439         printk (KERN_INFO PREFIX "Host address width %d\n",
440                 dmar->width + 1);
441
442         entry_header = (struct acpi_dmar_header *)(dmar + 1);
443         while (((unsigned long)entry_header) <
444                         (((unsigned long)dmar) + dmar_tbl->length)) {
445                 /* Avoid looping forever on bad ACPI tables */
446                 if (entry_header->length == 0) {
447                         printk(KERN_WARNING PREFIX
448                                 "Invalid 0-length structure\n");
449                         ret = -EINVAL;
450                         break;
451                 }
452
453                 dmar_table_print_dmar_entry(entry_header);
454
455                 switch (entry_header->type) {
456                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
457                         ret = dmar_parse_one_drhd(entry_header);
458                         break;
459                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
460 #ifdef CONFIG_DMAR
461                         ret = dmar_parse_one_rmrr(entry_header);
462 #endif
463                         break;
464                 case ACPI_DMAR_TYPE_ATSR:
465 #ifdef CONFIG_DMAR
466                         ret = dmar_parse_one_atsr(entry_header);
467 #endif
468                         break;
469                 case ACPI_DMAR_HARDWARE_AFFINITY:
470                         /* We don't do anything with RHSA (yet?) */
471                         break;
472                 default:
473                         printk(KERN_WARNING PREFIX
474                                 "Unknown DMAR structure type %d\n",
475                                 entry_header->type);
476                         ret = 0; /* for forward compatibility */
477                         break;
478                 }
479                 if (ret)
480                         break;
481
482                 entry_header = ((void *)entry_header + entry_header->length);
483         }
484         return ret;
485 }
486
487 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
488                           struct pci_dev *dev)
489 {
490         int index;
491
492         while (dev) {
493                 for (index = 0; index < cnt; index++)
494                         if (dev == devices[index])
495                                 return 1;
496
497                 /* Check our parent */
498                 dev = dev->bus->self;
499         }
500
501         return 0;
502 }
503
504 struct dmar_drhd_unit *
505 dmar_find_matched_drhd_unit(struct pci_dev *dev)
506 {
507         struct dmar_drhd_unit *dmaru = NULL;
508         struct acpi_dmar_hardware_unit *drhd;
509
510         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
511                 drhd = container_of(dmaru->hdr,
512                                     struct acpi_dmar_hardware_unit,
513                                     header);
514
515                 if (dmaru->include_all &&
516                     drhd->segment == pci_domain_nr(dev->bus))
517                         return dmaru;
518
519                 if (dmar_pci_device_match(dmaru->devices,
520                                           dmaru->devices_cnt, dev))
521                         return dmaru;
522         }
523
524         return NULL;
525 }
526
527 int __init dmar_dev_scope_init(void)
528 {
529         struct dmar_drhd_unit *drhd, *drhd_n;
530         int ret = -ENODEV;
531
532         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
533                 ret = dmar_parse_dev(drhd);
534                 if (ret)
535                         return ret;
536         }
537
538 #ifdef CONFIG_DMAR
539         {
540                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
541                 struct dmar_atsr_unit *atsr, *atsr_n;
542
543                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
544                         ret = rmrr_parse_dev(rmrr);
545                         if (ret)
546                                 return ret;
547                 }
548
549                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
550                         ret = atsr_parse_dev(atsr);
551                         if (ret)
552                                 return ret;
553                 }
554         }
555 #endif
556
557         return ret;
558 }
559
560
561 int __init dmar_table_init(void)
562 {
563         static int dmar_table_initialized;
564         int ret;
565
566         if (dmar_table_initialized)
567                 return 0;
568
569         dmar_table_initialized = 1;
570
571         ret = parse_dmar_table();
572         if (ret) {
573                 if (ret != -ENODEV)
574                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
575                 return ret;
576         }
577
578         if (list_empty(&dmar_drhd_units)) {
579                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
580                 return -ENODEV;
581         }
582
583 #ifdef CONFIG_DMAR
584         if (list_empty(&dmar_rmrr_units))
585                 printk(KERN_INFO PREFIX "No RMRR found\n");
586
587         if (list_empty(&dmar_atsr_units))
588                 printk(KERN_INFO PREFIX "No ATSR found\n");
589 #endif
590
591         return 0;
592 }
593
594 void __init detect_intel_iommu(void)
595 {
596         int ret;
597
598         ret = dmar_table_detect();
599
600         {
601 #ifdef CONFIG_INTR_REMAP
602                 struct acpi_table_dmar *dmar;
603                 /*
604                  * for now we will disable dma-remapping when interrupt
605                  * remapping is enabled.
606                  * When support for queued invalidation for IOTLB invalidation
607                  * is added, we will not need this any more.
608                  */
609                 dmar = (struct acpi_table_dmar *) dmar_tbl;
610                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
611                         printk(KERN_INFO
612                                "Queued invalidation will be enabled to support "
613                                "x2apic and Intr-remapping.\n");
614 #endif
615 #ifdef CONFIG_DMAR
616                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
617                     !dmar_disabled) {
618                         iommu_detected = 1;
619                         /* Make sure ACS will be enabled */
620                         pci_request_acs();
621                 }
622 #endif
623         }
624         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
625         dmar_tbl = NULL;
626 }
627
628
629 int alloc_iommu(struct dmar_drhd_unit *drhd)
630 {
631         struct intel_iommu *iommu;
632         int map_size;
633         u32 ver;
634         static int iommu_allocated = 0;
635         int agaw = 0;
636         int msagaw = 0;
637
638         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
639         if (!iommu)
640                 return -ENOMEM;
641
642         iommu->seq_id = iommu_allocated++;
643         sprintf (iommu->name, "dmar%d", iommu->seq_id);
644
645         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
646         if (!iommu->reg) {
647                 printk(KERN_ERR "IOMMU: can't map the region\n");
648                 goto error;
649         }
650         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
651         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
652
653         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
654                 /* Promote an attitude of violence to a BIOS engineer today */
655                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
656                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
657                      drhd->reg_base_addr,
658                      dmi_get_system_info(DMI_BIOS_VENDOR),
659                      dmi_get_system_info(DMI_BIOS_VERSION),
660                      dmi_get_system_info(DMI_PRODUCT_VERSION));
661                 goto err_unmap;
662         }
663
664 #ifdef CONFIG_DMAR
665         agaw = iommu_calculate_agaw(iommu);
666         if (agaw < 0) {
667                 printk(KERN_ERR
668                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
669                        iommu->seq_id);
670                 goto err_unmap;
671         }
672         msagaw = iommu_calculate_max_sagaw(iommu);
673         if (msagaw < 0) {
674                 printk(KERN_ERR
675                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
676                         iommu->seq_id);
677                 goto err_unmap;
678         }
679 #endif
680         iommu->agaw = agaw;
681         iommu->msagaw = msagaw;
682
683         /* the registers might be more than one page */
684         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
685                 cap_max_fault_reg_offset(iommu->cap));
686         map_size = VTD_PAGE_ALIGN(map_size);
687         if (map_size > VTD_PAGE_SIZE) {
688                 iounmap(iommu->reg);
689                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
690                 if (!iommu->reg) {
691                         printk(KERN_ERR "IOMMU: can't map the region\n");
692                         goto error;
693                 }
694         }
695
696         ver = readl(iommu->reg + DMAR_VER_REG);
697         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
698                 (unsigned long long)drhd->reg_base_addr,
699                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
700                 (unsigned long long)iommu->cap,
701                 (unsigned long long)iommu->ecap);
702
703         spin_lock_init(&iommu->register_lock);
704
705         drhd->iommu = iommu;
706         return 0;
707
708  err_unmap:
709         iounmap(iommu->reg);
710  error:
711         kfree(iommu);
712         return -1;
713 }
714
715 void free_iommu(struct intel_iommu *iommu)
716 {
717         if (!iommu)
718                 return;
719
720 #ifdef CONFIG_DMAR
721         free_dmar_iommu(iommu);
722 #endif
723
724         if (iommu->reg)
725                 iounmap(iommu->reg);
726         kfree(iommu);
727 }
728
729 /*
730  * Reclaim all the submitted descriptors which have completed its work.
731  */
732 static inline void reclaim_free_desc(struct q_inval *qi)
733 {
734         while (qi->desc_status[qi->free_tail] == QI_DONE ||
735                qi->desc_status[qi->free_tail] == QI_ABORT) {
736                 qi->desc_status[qi->free_tail] = QI_FREE;
737                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
738                 qi->free_cnt++;
739         }
740 }
741
742 static int qi_check_fault(struct intel_iommu *iommu, int index)
743 {
744         u32 fault;
745         int head, tail;
746         struct q_inval *qi = iommu->qi;
747         int wait_index = (index + 1) % QI_LENGTH;
748
749         if (qi->desc_status[wait_index] == QI_ABORT)
750                 return -EAGAIN;
751
752         fault = readl(iommu->reg + DMAR_FSTS_REG);
753
754         /*
755          * If IQE happens, the head points to the descriptor associated
756          * with the error. No new descriptors are fetched until the IQE
757          * is cleared.
758          */
759         if (fault & DMA_FSTS_IQE) {
760                 head = readl(iommu->reg + DMAR_IQH_REG);
761                 if ((head >> DMAR_IQ_SHIFT) == index) {
762                         printk(KERN_ERR "VT-d detected invalid descriptor: "
763                                 "low=%llx, high=%llx\n",
764                                 (unsigned long long)qi->desc[index].low,
765                                 (unsigned long long)qi->desc[index].high);
766                         memcpy(&qi->desc[index], &qi->desc[wait_index],
767                                         sizeof(struct qi_desc));
768                         __iommu_flush_cache(iommu, &qi->desc[index],
769                                         sizeof(struct qi_desc));
770                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
771                         return -EINVAL;
772                 }
773         }
774
775         /*
776          * If ITE happens, all pending wait_desc commands are aborted.
777          * No new descriptors are fetched until the ITE is cleared.
778          */
779         if (fault & DMA_FSTS_ITE) {
780                 head = readl(iommu->reg + DMAR_IQH_REG);
781                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
782                 head |= 1;
783                 tail = readl(iommu->reg + DMAR_IQT_REG);
784                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
785
786                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
787
788                 do {
789                         if (qi->desc_status[head] == QI_IN_USE)
790                                 qi->desc_status[head] = QI_ABORT;
791                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
792                 } while (head != tail);
793
794                 if (qi->desc_status[wait_index] == QI_ABORT)
795                         return -EAGAIN;
796         }
797
798         if (fault & DMA_FSTS_ICE)
799                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
800
801         return 0;
802 }
803
804 /*
805  * Submit the queued invalidation descriptor to the remapping
806  * hardware unit and wait for its completion.
807  */
808 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
809 {
810         int rc;
811         struct q_inval *qi = iommu->qi;
812         struct qi_desc *hw, wait_desc;
813         int wait_index, index;
814         unsigned long flags;
815
816         if (!qi)
817                 return 0;
818
819         hw = qi->desc;
820
821 restart:
822         rc = 0;
823
824         spin_lock_irqsave(&qi->q_lock, flags);
825         while (qi->free_cnt < 3) {
826                 spin_unlock_irqrestore(&qi->q_lock, flags);
827                 cpu_relax();
828                 spin_lock_irqsave(&qi->q_lock, flags);
829         }
830
831         index = qi->free_head;
832         wait_index = (index + 1) % QI_LENGTH;
833
834         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
835
836         hw[index] = *desc;
837
838         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
839                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
840         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
841
842         hw[wait_index] = wait_desc;
843
844         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
845         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
846
847         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
848         qi->free_cnt -= 2;
849
850         /*
851          * update the HW tail register indicating the presence of
852          * new descriptors.
853          */
854         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
855
856         while (qi->desc_status[wait_index] != QI_DONE) {
857                 /*
858                  * We will leave the interrupts disabled, to prevent interrupt
859                  * context to queue another cmd while a cmd is already submitted
860                  * and waiting for completion on this cpu. This is to avoid
861                  * a deadlock where the interrupt context can wait indefinitely
862                  * for free slots in the queue.
863                  */
864                 rc = qi_check_fault(iommu, index);
865                 if (rc)
866                         break;
867
868                 spin_unlock(&qi->q_lock);
869                 cpu_relax();
870                 spin_lock(&qi->q_lock);
871         }
872
873         qi->desc_status[index] = QI_DONE;
874
875         reclaim_free_desc(qi);
876         spin_unlock_irqrestore(&qi->q_lock, flags);
877
878         if (rc == -EAGAIN)
879                 goto restart;
880
881         return rc;
882 }
883
884 /*
885  * Flush the global interrupt entry cache.
886  */
887 void qi_global_iec(struct intel_iommu *iommu)
888 {
889         struct qi_desc desc;
890
891         desc.low = QI_IEC_TYPE;
892         desc.high = 0;
893
894         /* should never fail */
895         qi_submit_sync(&desc, iommu);
896 }
897
898 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
899                       u64 type)
900 {
901         struct qi_desc desc;
902
903         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
904                         | QI_CC_GRAN(type) | QI_CC_TYPE;
905         desc.high = 0;
906
907         qi_submit_sync(&desc, iommu);
908 }
909
910 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
911                     unsigned int size_order, u64 type)
912 {
913         u8 dw = 0, dr = 0;
914
915         struct qi_desc desc;
916         int ih = 0;
917
918         if (cap_write_drain(iommu->cap))
919                 dw = 1;
920
921         if (cap_read_drain(iommu->cap))
922                 dr = 1;
923
924         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
925                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
926         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
927                 | QI_IOTLB_AM(size_order);
928
929         qi_submit_sync(&desc, iommu);
930 }
931
932 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
933                         u64 addr, unsigned mask)
934 {
935         struct qi_desc desc;
936
937         if (mask) {
938                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
939                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
940                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
941         } else
942                 desc.high = QI_DEV_IOTLB_ADDR(addr);
943
944         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
945                 qdep = 0;
946
947         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
948                    QI_DIOTLB_TYPE;
949
950         qi_submit_sync(&desc, iommu);
951 }
952
953 /*
954  * Disable Queued Invalidation interface.
955  */
956 void dmar_disable_qi(struct intel_iommu *iommu)
957 {
958         unsigned long flags;
959         u32 sts;
960         cycles_t start_time = get_cycles();
961
962         if (!ecap_qis(iommu->ecap))
963                 return;
964
965         spin_lock_irqsave(&iommu->register_lock, flags);
966
967         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
968         if (!(sts & DMA_GSTS_QIES))
969                 goto end;
970
971         /*
972          * Give a chance to HW to complete the pending invalidation requests.
973          */
974         while ((readl(iommu->reg + DMAR_IQT_REG) !=
975                 readl(iommu->reg + DMAR_IQH_REG)) &&
976                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
977                 cpu_relax();
978
979         iommu->gcmd &= ~DMA_GCMD_QIE;
980         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
981
982         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
983                       !(sts & DMA_GSTS_QIES), sts);
984 end:
985         spin_unlock_irqrestore(&iommu->register_lock, flags);
986 }
987
988 /*
989  * Enable queued invalidation.
990  */
991 static void __dmar_enable_qi(struct intel_iommu *iommu)
992 {
993         u32 sts;
994         unsigned long flags;
995         struct q_inval *qi = iommu->qi;
996
997         qi->free_head = qi->free_tail = 0;
998         qi->free_cnt = QI_LENGTH;
999
1000         spin_lock_irqsave(&iommu->register_lock, flags);
1001
1002         /* write zero to the tail reg */
1003         writel(0, iommu->reg + DMAR_IQT_REG);
1004
1005         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1006
1007         iommu->gcmd |= DMA_GCMD_QIE;
1008         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1009
1010         /* Make sure hardware complete it */
1011         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1012
1013         spin_unlock_irqrestore(&iommu->register_lock, flags);
1014 }
1015
1016 /*
1017  * Enable Queued Invalidation interface. This is a must to support
1018  * interrupt-remapping. Also used by DMA-remapping, which replaces
1019  * register based IOTLB invalidation.
1020  */
1021 int dmar_enable_qi(struct intel_iommu *iommu)
1022 {
1023         struct q_inval *qi;
1024
1025         if (!ecap_qis(iommu->ecap))
1026                 return -ENOENT;
1027
1028         /*
1029          * queued invalidation is already setup and enabled.
1030          */
1031         if (iommu->qi)
1032                 return 0;
1033
1034         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1035         if (!iommu->qi)
1036                 return -ENOMEM;
1037
1038         qi = iommu->qi;
1039
1040         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1041         if (!qi->desc) {
1042                 kfree(qi);
1043                 iommu->qi = 0;
1044                 return -ENOMEM;
1045         }
1046
1047         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1048         if (!qi->desc_status) {
1049                 free_page((unsigned long) qi->desc);
1050                 kfree(qi);
1051                 iommu->qi = 0;
1052                 return -ENOMEM;
1053         }
1054
1055         qi->free_head = qi->free_tail = 0;
1056         qi->free_cnt = QI_LENGTH;
1057
1058         spin_lock_init(&qi->q_lock);
1059
1060         __dmar_enable_qi(iommu);
1061
1062         return 0;
1063 }
1064
1065 /* iommu interrupt handling. Most stuff are MSI-like. */
1066
1067 enum faulttype {
1068         DMA_REMAP,
1069         INTR_REMAP,
1070         UNKNOWN,
1071 };
1072
1073 static const char *dma_remap_fault_reasons[] =
1074 {
1075         "Software",
1076         "Present bit in root entry is clear",
1077         "Present bit in context entry is clear",
1078         "Invalid context entry",
1079         "Access beyond MGAW",
1080         "PTE Write access is not set",
1081         "PTE Read access is not set",
1082         "Next page table ptr is invalid",
1083         "Root table address invalid",
1084         "Context table ptr is invalid",
1085         "non-zero reserved fields in RTP",
1086         "non-zero reserved fields in CTP",
1087         "non-zero reserved fields in PTE",
1088 };
1089
1090 static const char *intr_remap_fault_reasons[] =
1091 {
1092         "Detected reserved fields in the decoded interrupt-remapped request",
1093         "Interrupt index exceeded the interrupt-remapping table size",
1094         "Present field in the IRTE entry is clear",
1095         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1096         "Detected reserved fields in the IRTE entry",
1097         "Blocked a compatibility format interrupt request",
1098         "Blocked an interrupt request due to source-id verification failure",
1099 };
1100
1101 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1102
1103 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1104 {
1105         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1106                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1107                 *fault_type = INTR_REMAP;
1108                 return intr_remap_fault_reasons[fault_reason - 0x20];
1109         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1110                 *fault_type = DMA_REMAP;
1111                 return dma_remap_fault_reasons[fault_reason];
1112         } else {
1113                 *fault_type = UNKNOWN;
1114                 return "Unknown";
1115         }
1116 }
1117
1118 void dmar_msi_unmask(unsigned int irq)
1119 {
1120         struct intel_iommu *iommu = get_irq_data(irq);
1121         unsigned long flag;
1122
1123         /* unmask it */
1124         spin_lock_irqsave(&iommu->register_lock, flag);
1125         writel(0, iommu->reg + DMAR_FECTL_REG);
1126         /* Read a reg to force flush the post write */
1127         readl(iommu->reg + DMAR_FECTL_REG);
1128         spin_unlock_irqrestore(&iommu->register_lock, flag);
1129 }
1130
1131 void dmar_msi_mask(unsigned int irq)
1132 {
1133         unsigned long flag;
1134         struct intel_iommu *iommu = get_irq_data(irq);
1135
1136         /* mask it */
1137         spin_lock_irqsave(&iommu->register_lock, flag);
1138         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1139         /* Read a reg to force flush the post write */
1140         readl(iommu->reg + DMAR_FECTL_REG);
1141         spin_unlock_irqrestore(&iommu->register_lock, flag);
1142 }
1143
1144 void dmar_msi_write(int irq, struct msi_msg *msg)
1145 {
1146         struct intel_iommu *iommu = get_irq_data(irq);
1147         unsigned long flag;
1148
1149         spin_lock_irqsave(&iommu->register_lock, flag);
1150         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1151         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1152         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1153         spin_unlock_irqrestore(&iommu->register_lock, flag);
1154 }
1155
1156 void dmar_msi_read(int irq, struct msi_msg *msg)
1157 {
1158         struct intel_iommu *iommu = get_irq_data(irq);
1159         unsigned long flag;
1160
1161         spin_lock_irqsave(&iommu->register_lock, flag);
1162         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1163         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1164         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1165         spin_unlock_irqrestore(&iommu->register_lock, flag);
1166 }
1167
1168 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1169                 u8 fault_reason, u16 source_id, unsigned long long addr)
1170 {
1171         const char *reason;
1172         int fault_type;
1173
1174         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1175
1176         if (fault_type == INTR_REMAP)
1177                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1178                        "fault index %llx\n"
1179                         "INTR-REMAP:[fault reason %02d] %s\n",
1180                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1181                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1182                         fault_reason, reason);
1183         else
1184                 printk(KERN_ERR
1185                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1186                        "fault addr %llx \n"
1187                        "DMAR:[fault reason %02d] %s\n",
1188                        (type ? "DMA Read" : "DMA Write"),
1189                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1190                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1191         return 0;
1192 }
1193
1194 #define PRIMARY_FAULT_REG_LEN (16)
1195 irqreturn_t dmar_fault(int irq, void *dev_id)
1196 {
1197         struct intel_iommu *iommu = dev_id;
1198         int reg, fault_index;
1199         u32 fault_status;
1200         unsigned long flag;
1201
1202         spin_lock_irqsave(&iommu->register_lock, flag);
1203         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1204         if (fault_status)
1205                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1206                        fault_status);
1207
1208         /* TBD: ignore advanced fault log currently */
1209         if (!(fault_status & DMA_FSTS_PPF))
1210                 goto clear_rest;
1211
1212         fault_index = dma_fsts_fault_record_index(fault_status);
1213         reg = cap_fault_reg_offset(iommu->cap);
1214         while (1) {
1215                 u8 fault_reason;
1216                 u16 source_id;
1217                 u64 guest_addr;
1218                 int type;
1219                 u32 data;
1220
1221                 /* highest 32 bits */
1222                 data = readl(iommu->reg + reg +
1223                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1224                 if (!(data & DMA_FRCD_F))
1225                         break;
1226
1227                 fault_reason = dma_frcd_fault_reason(data);
1228                 type = dma_frcd_type(data);
1229
1230                 data = readl(iommu->reg + reg +
1231                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1232                 source_id = dma_frcd_source_id(data);
1233
1234                 guest_addr = dmar_readq(iommu->reg + reg +
1235                                 fault_index * PRIMARY_FAULT_REG_LEN);
1236                 guest_addr = dma_frcd_page_addr(guest_addr);
1237                 /* clear the fault */
1238                 writel(DMA_FRCD_F, iommu->reg + reg +
1239                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1240
1241                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1242
1243                 dmar_fault_do_one(iommu, type, fault_reason,
1244                                 source_id, guest_addr);
1245
1246                 fault_index++;
1247                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1248                         fault_index = 0;
1249                 spin_lock_irqsave(&iommu->register_lock, flag);
1250         }
1251 clear_rest:
1252         /* clear all the other faults */
1253         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1254         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1255
1256         spin_unlock_irqrestore(&iommu->register_lock, flag);
1257         return IRQ_HANDLED;
1258 }
1259
1260 int dmar_set_interrupt(struct intel_iommu *iommu)
1261 {
1262         int irq, ret;
1263
1264         /*
1265          * Check if the fault interrupt is already initialized.
1266          */
1267         if (iommu->irq)
1268                 return 0;
1269
1270         irq = create_irq();
1271         if (!irq) {
1272                 printk(KERN_ERR "IOMMU: no free vectors\n");
1273                 return -EINVAL;
1274         }
1275
1276         set_irq_data(irq, iommu);
1277         iommu->irq = irq;
1278
1279         ret = arch_setup_dmar_msi(irq);
1280         if (ret) {
1281                 set_irq_data(irq, NULL);
1282                 iommu->irq = 0;
1283                 destroy_irq(irq);
1284                 return ret;
1285         }
1286
1287         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1288         if (ret)
1289                 printk(KERN_ERR "IOMMU: can't request irq\n");
1290         return ret;
1291 }
1292
1293 int __init enable_drhd_fault_handling(void)
1294 {
1295         struct dmar_drhd_unit *drhd;
1296
1297         /*
1298          * Enable fault control interrupt.
1299          */
1300         for_each_drhd_unit(drhd) {
1301                 int ret;
1302                 struct intel_iommu *iommu = drhd->iommu;
1303                 ret = dmar_set_interrupt(iommu);
1304
1305                 if (ret) {
1306                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1307                                " interrupt, ret %d\n",
1308                                (unsigned long long)drhd->reg_base_addr, ret);
1309                         return -1;
1310                 }
1311         }
1312
1313         return 0;
1314 }
1315
1316 /*
1317  * Re-enable Queued Invalidation interface.
1318  */
1319 int dmar_reenable_qi(struct intel_iommu *iommu)
1320 {
1321         if (!ecap_qis(iommu->ecap))
1322                 return -ENOENT;
1323
1324         if (!iommu->qi)
1325                 return -ENOENT;
1326
1327         /*
1328          * First disable queued invalidation.
1329          */
1330         dmar_disable_qi(iommu);
1331         /*
1332          * Then enable queued invalidation again. Since there is no pending
1333          * invalidation requests now, it's safe to re-enable queued
1334          * invalidation.
1335          */
1336         __dmar_enable_qi(iommu);
1337
1338         return 0;
1339 }
1340
1341 /*
1342  * Check interrupt remapping support in DMAR table description.
1343  */
1344 int dmar_ir_support(void)
1345 {
1346         struct acpi_table_dmar *dmar;
1347         dmar = (struct acpi_table_dmar *)dmar_tbl;
1348         return dmar->flags & 0x1;
1349 }