]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/pci/dmar.c
22b02c6df8542138a27fcad4511eb008f0afbb0c
[net-next-2.6.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36 #include <linux/tboot.h>
37 #include <linux/dmi.h>
38
39 #define PREFIX "DMAR: "
40
41 /* No locks are needed as DMA remapping hardware unit
42  * list is constructed at boot time and hotplug of
43  * these units are not supported by the architecture.
44  */
45 LIST_HEAD(dmar_drhd_units);
46
47 static struct acpi_table_header * __initdata dmar_tbl;
48 static acpi_size dmar_tbl_size;
49
50 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
51 {
52         /*
53          * add INCLUDE_ALL at the tail, so scan the list will find it at
54          * the very end.
55          */
56         if (drhd->include_all)
57                 list_add_tail(&drhd->list, &dmar_drhd_units);
58         else
59                 list_add(&drhd->list, &dmar_drhd_units);
60 }
61
62 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
63                                            struct pci_dev **dev, u16 segment)
64 {
65         struct pci_bus *bus;
66         struct pci_dev *pdev = NULL;
67         struct acpi_dmar_pci_path *path;
68         int count;
69
70         bus = pci_find_bus(segment, scope->bus);
71         path = (struct acpi_dmar_pci_path *)(scope + 1);
72         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
73                 / sizeof(struct acpi_dmar_pci_path);
74
75         while (count) {
76                 if (pdev)
77                         pci_dev_put(pdev);
78                 /*
79                  * Some BIOSes list non-exist devices in DMAR table, just
80                  * ignore it
81                  */
82                 if (!bus) {
83                         printk(KERN_WARNING
84                         PREFIX "Device scope bus [%d] not found\n",
85                         scope->bus);
86                         break;
87                 }
88                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
89                 if (!pdev) {
90                         printk(KERN_WARNING PREFIX
91                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
92                                 segment, bus->number, path->dev, path->fn);
93                         break;
94                 }
95                 path ++;
96                 count --;
97                 bus = pdev->subordinate;
98         }
99         if (!pdev) {
100                 printk(KERN_WARNING PREFIX
101                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
102                 segment, scope->bus, path->dev, path->fn);
103                 *dev = NULL;
104                 return 0;
105         }
106         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
107                         pdev->subordinate) || (scope->entry_type == \
108                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
109                 pci_dev_put(pdev);
110                 printk(KERN_WARNING PREFIX
111                         "Device scope type does not match for %s\n",
112                          pci_name(pdev));
113                 return -EINVAL;
114         }
115         *dev = pdev;
116         return 0;
117 }
118
119 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
120                                        struct pci_dev ***devices, u16 segment)
121 {
122         struct acpi_dmar_device_scope *scope;
123         void * tmp = start;
124         int index;
125         int ret;
126
127         *cnt = 0;
128         while (start < end) {
129                 scope = start;
130                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
131                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
132                         (*cnt)++;
133                 else
134                         printk(KERN_WARNING PREFIX
135                                 "Unsupported device scope\n");
136                 start += scope->length;
137         }
138         if (*cnt == 0)
139                 return 0;
140
141         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
142         if (!*devices)
143                 return -ENOMEM;
144
145         start = tmp;
146         index = 0;
147         while (start < end) {
148                 scope = start;
149                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
150                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
151                         ret = dmar_parse_one_dev_scope(scope,
152                                 &(*devices)[index], segment);
153                         if (ret) {
154                                 kfree(*devices);
155                                 return ret;
156                         }
157                         index ++;
158                 }
159                 start += scope->length;
160         }
161
162         return 0;
163 }
164
165 /**
166  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
167  * structure which uniquely represent one DMA remapping hardware unit
168  * present in the platform
169  */
170 static int __init
171 dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 {
173         struct acpi_dmar_hardware_unit *drhd;
174         struct dmar_drhd_unit *dmaru;
175         int ret = 0;
176
177         drhd = (struct acpi_dmar_hardware_unit *)header;
178         if (!drhd->address) {
179                 /* Promote an attitude of violence to a BIOS engineer today */
180                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
181                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
182                      dmi_get_system_info(DMI_BIOS_VENDOR),
183                      dmi_get_system_info(DMI_BIOS_VERSION),
184                      dmi_get_system_info(DMI_PRODUCT_VERSION));
185                 return -ENODEV;
186         }
187         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
188         if (!dmaru)
189                 return -ENOMEM;
190
191         dmaru->hdr = header;
192         dmaru->reg_base_addr = drhd->address;
193         dmaru->segment = drhd->segment;
194         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
195
196         ret = alloc_iommu(dmaru);
197         if (ret) {
198                 kfree(dmaru);
199                 return ret;
200         }
201         dmar_register_drhd_unit(dmaru);
202         return 0;
203 }
204
205 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
206 {
207         struct acpi_dmar_hardware_unit *drhd;
208         int ret = 0;
209
210         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
211
212         if (dmaru->include_all)
213                 return 0;
214
215         ret = dmar_parse_dev_scope((void *)(drhd + 1),
216                                 ((void *)drhd) + drhd->header.length,
217                                 &dmaru->devices_cnt, &dmaru->devices,
218                                 drhd->segment);
219         if (ret) {
220                 list_del(&dmaru->list);
221                 kfree(dmaru);
222         }
223         return ret;
224 }
225
226 #ifdef CONFIG_DMAR
227 LIST_HEAD(dmar_rmrr_units);
228
229 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
230 {
231         list_add(&rmrr->list, &dmar_rmrr_units);
232 }
233
234
235 static int __init
236 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
237 {
238         struct acpi_dmar_reserved_memory *rmrr;
239         struct dmar_rmrr_unit *rmrru;
240
241         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
242         if (!rmrru)
243                 return -ENOMEM;
244
245         rmrru->hdr = header;
246         rmrr = (struct acpi_dmar_reserved_memory *)header;
247         rmrru->base_address = rmrr->base_address;
248         rmrru->end_address = rmrr->end_address;
249
250         dmar_register_rmrr_unit(rmrru);
251         return 0;
252 }
253
254 static int __init
255 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
256 {
257         struct acpi_dmar_reserved_memory *rmrr;
258         int ret;
259
260         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
261         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
262                 ((void *)rmrr) + rmrr->header.length,
263                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
264
265         if (ret || (rmrru->devices_cnt == 0)) {
266                 list_del(&rmrru->list);
267                 kfree(rmrru);
268         }
269         return ret;
270 }
271
272 static LIST_HEAD(dmar_atsr_units);
273
274 static int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
275 {
276         struct acpi_dmar_atsr *atsr;
277         struct dmar_atsr_unit *atsru;
278
279         atsr = container_of(hdr, struct acpi_dmar_atsr, header);
280         atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
281         if (!atsru)
282                 return -ENOMEM;
283
284         atsru->hdr = hdr;
285         atsru->include_all = atsr->flags & 0x1;
286
287         list_add(&atsru->list, &dmar_atsr_units);
288
289         return 0;
290 }
291
292 static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru)
293 {
294         int rc;
295         struct acpi_dmar_atsr *atsr;
296
297         if (atsru->include_all)
298                 return 0;
299
300         atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
301         rc = dmar_parse_dev_scope((void *)(atsr + 1),
302                                 (void *)atsr + atsr->header.length,
303                                 &atsru->devices_cnt, &atsru->devices,
304                                 atsr->segment);
305         if (rc || !atsru->devices_cnt) {
306                 list_del(&atsru->list);
307                 kfree(atsru);
308         }
309
310         return rc;
311 }
312
313 int dmar_find_matched_atsr_unit(struct pci_dev *dev)
314 {
315         int i;
316         struct pci_bus *bus;
317         struct acpi_dmar_atsr *atsr;
318         struct dmar_atsr_unit *atsru;
319
320         list_for_each_entry(atsru, &dmar_atsr_units, list) {
321                 atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header);
322                 if (atsr->segment == pci_domain_nr(dev->bus))
323                         goto found;
324         }
325
326         return 0;
327
328 found:
329         for (bus = dev->bus; bus; bus = bus->parent) {
330                 struct pci_dev *bridge = bus->self;
331
332                 if (!bridge || !bridge->is_pcie ||
333                     bridge->pcie_type == PCI_EXP_TYPE_PCI_BRIDGE)
334                         return 0;
335
336                 if (bridge->pcie_type == PCI_EXP_TYPE_ROOT_PORT) {
337                         for (i = 0; i < atsru->devices_cnt; i++)
338                                 if (atsru->devices[i] == bridge)
339                                         return 1;
340                         break;
341                 }
342         }
343
344         if (atsru->include_all)
345                 return 1;
346
347         return 0;
348 }
349 #endif
350
351 static void __init
352 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
353 {
354         struct acpi_dmar_hardware_unit *drhd;
355         struct acpi_dmar_reserved_memory *rmrr;
356         struct acpi_dmar_atsr *atsr;
357         struct acpi_dmar_rhsa *rhsa;
358
359         switch (header->type) {
360         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
361                 drhd = container_of(header, struct acpi_dmar_hardware_unit,
362                                     header);
363                 printk (KERN_INFO PREFIX
364                         "DRHD base: %#016Lx flags: %#x\n",
365                         (unsigned long long)drhd->address, drhd->flags);
366                 break;
367         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
368                 rmrr = container_of(header, struct acpi_dmar_reserved_memory,
369                                     header);
370                 printk (KERN_INFO PREFIX
371                         "RMRR base: %#016Lx end: %#016Lx\n",
372                         (unsigned long long)rmrr->base_address,
373                         (unsigned long long)rmrr->end_address);
374                 break;
375         case ACPI_DMAR_TYPE_ATSR:
376                 atsr = container_of(header, struct acpi_dmar_atsr, header);
377                 printk(KERN_INFO PREFIX "ATSR flags: %#x\n", atsr->flags);
378                 break;
379         case ACPI_DMAR_HARDWARE_AFFINITY:
380                 rhsa = container_of(header, struct acpi_dmar_rhsa, header);
381                 printk(KERN_INFO PREFIX "RHSA base: %#016Lx proximity domain: %#x\n",
382                        (unsigned long long)rhsa->base_address,
383                        rhsa->proximity_domain);
384                 break;
385         }
386 }
387
388 /**
389  * dmar_table_detect - checks to see if the platform supports DMAR devices
390  */
391 static int __init dmar_table_detect(void)
392 {
393         acpi_status status = AE_OK;
394
395         /* if we could find DMAR table, then there are DMAR devices */
396         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
397                                 (struct acpi_table_header **)&dmar_tbl,
398                                 &dmar_tbl_size);
399
400         if (ACPI_SUCCESS(status) && !dmar_tbl) {
401                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
402                 status = AE_NOT_FOUND;
403         }
404
405         return (ACPI_SUCCESS(status) ? 1 : 0);
406 }
407
408 /**
409  * parse_dmar_table - parses the DMA reporting table
410  */
411 static int __init
412 parse_dmar_table(void)
413 {
414         struct acpi_table_dmar *dmar;
415         struct acpi_dmar_header *entry_header;
416         int ret = 0;
417
418         /*
419          * Do it again, earlier dmar_tbl mapping could be mapped with
420          * fixed map.
421          */
422         dmar_table_detect();
423
424         /*
425          * ACPI tables may not be DMA protected by tboot, so use DMAR copy
426          * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
427          */
428         dmar_tbl = tboot_get_dmar_table(dmar_tbl);
429
430         dmar = (struct acpi_table_dmar *)dmar_tbl;
431         if (!dmar)
432                 return -ENODEV;
433
434         if (dmar->width < PAGE_SHIFT - 1) {
435                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
436                 return -EINVAL;
437         }
438
439         printk (KERN_INFO PREFIX "Host address width %d\n",
440                 dmar->width + 1);
441
442         entry_header = (struct acpi_dmar_header *)(dmar + 1);
443         while (((unsigned long)entry_header) <
444                         (((unsigned long)dmar) + dmar_tbl->length)) {
445                 /* Avoid looping forever on bad ACPI tables */
446                 if (entry_header->length == 0) {
447                         printk(KERN_WARNING PREFIX
448                                 "Invalid 0-length structure\n");
449                         ret = -EINVAL;
450                         break;
451                 }
452
453                 dmar_table_print_dmar_entry(entry_header);
454
455                 switch (entry_header->type) {
456                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
457                         ret = dmar_parse_one_drhd(entry_header);
458                         break;
459                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
460 #ifdef CONFIG_DMAR
461                         ret = dmar_parse_one_rmrr(entry_header);
462 #endif
463                         break;
464                 case ACPI_DMAR_TYPE_ATSR:
465 #ifdef CONFIG_DMAR
466                         ret = dmar_parse_one_atsr(entry_header);
467 #endif
468                         break;
469                 case ACPI_DMAR_HARDWARE_AFFINITY:
470                         /* We don't do anything with RHSA (yet?) */
471                         break;
472                 default:
473                         printk(KERN_WARNING PREFIX
474                                 "Unknown DMAR structure type %d\n",
475                                 entry_header->type);
476                         ret = 0; /* for forward compatibility */
477                         break;
478                 }
479                 if (ret)
480                         break;
481
482                 entry_header = ((void *)entry_header + entry_header->length);
483         }
484         return ret;
485 }
486
487 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
488                           struct pci_dev *dev)
489 {
490         int index;
491
492         while (dev) {
493                 for (index = 0; index < cnt; index++)
494                         if (dev == devices[index])
495                                 return 1;
496
497                 /* Check our parent */
498                 dev = dev->bus->self;
499         }
500
501         return 0;
502 }
503
504 struct dmar_drhd_unit *
505 dmar_find_matched_drhd_unit(struct pci_dev *dev)
506 {
507         struct dmar_drhd_unit *dmaru = NULL;
508         struct acpi_dmar_hardware_unit *drhd;
509
510         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
511                 drhd = container_of(dmaru->hdr,
512                                     struct acpi_dmar_hardware_unit,
513                                     header);
514
515                 if (dmaru->include_all &&
516                     drhd->segment == pci_domain_nr(dev->bus))
517                         return dmaru;
518
519                 if (dmar_pci_device_match(dmaru->devices,
520                                           dmaru->devices_cnt, dev))
521                         return dmaru;
522         }
523
524         return NULL;
525 }
526
527 int __init dmar_dev_scope_init(void)
528 {
529         struct dmar_drhd_unit *drhd, *drhd_n;
530         int ret = -ENODEV;
531
532         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
533                 ret = dmar_parse_dev(drhd);
534                 if (ret)
535                         return ret;
536         }
537
538 #ifdef CONFIG_DMAR
539         {
540                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
541                 struct dmar_atsr_unit *atsr, *atsr_n;
542
543                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
544                         ret = rmrr_parse_dev(rmrr);
545                         if (ret)
546                                 return ret;
547                 }
548
549                 list_for_each_entry_safe(atsr, atsr_n, &dmar_atsr_units, list) {
550                         ret = atsr_parse_dev(atsr);
551                         if (ret)
552                                 return ret;
553                 }
554         }
555 #endif
556
557         return ret;
558 }
559
560
561 int __init dmar_table_init(void)
562 {
563         static int dmar_table_initialized;
564         int ret;
565
566         if (dmar_table_initialized)
567                 return 0;
568
569         dmar_table_initialized = 1;
570
571         ret = parse_dmar_table();
572         if (ret) {
573                 if (ret != -ENODEV)
574                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
575                 return ret;
576         }
577
578         if (list_empty(&dmar_drhd_units)) {
579                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
580                 return -ENODEV;
581         }
582
583 #ifdef CONFIG_DMAR
584         if (list_empty(&dmar_rmrr_units))
585                 printk(KERN_INFO PREFIX "No RMRR found\n");
586
587         if (list_empty(&dmar_atsr_units))
588                 printk(KERN_INFO PREFIX "No ATSR found\n");
589 #endif
590
591         return 0;
592 }
593
594 void __init detect_intel_iommu(void)
595 {
596         int ret;
597
598         ret = dmar_table_detect();
599
600         {
601 #ifdef CONFIG_INTR_REMAP
602                 struct acpi_table_dmar *dmar;
603                 /*
604                  * for now we will disable dma-remapping when interrupt
605                  * remapping is enabled.
606                  * When support for queued invalidation for IOTLB invalidation
607                  * is added, we will not need this any more.
608                  */
609                 dmar = (struct acpi_table_dmar *) dmar_tbl;
610                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
611                         printk(KERN_INFO
612                                "Queued invalidation will be enabled to support "
613                                "x2apic and Intr-remapping.\n");
614 #endif
615 #ifdef CONFIG_DMAR
616                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
617                     !dmar_disabled)
618                         iommu_detected = 1;
619 #endif
620         }
621         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
622         dmar_tbl = NULL;
623 }
624
625
626 int alloc_iommu(struct dmar_drhd_unit *drhd)
627 {
628         struct intel_iommu *iommu;
629         int map_size;
630         u32 ver;
631         static int iommu_allocated = 0;
632         int agaw = 0;
633         int msagaw = 0;
634
635         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
636         if (!iommu)
637                 return -ENOMEM;
638
639         iommu->seq_id = iommu_allocated++;
640         sprintf (iommu->name, "dmar%d", iommu->seq_id);
641
642         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
643         if (!iommu->reg) {
644                 printk(KERN_ERR "IOMMU: can't map the region\n");
645                 goto error;
646         }
647         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
648         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
649
650         if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
651                 /* Promote an attitude of violence to a BIOS engineer today */
652                 WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n"
653                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
654                      drhd->reg_base_addr,
655                      dmi_get_system_info(DMI_BIOS_VENDOR),
656                      dmi_get_system_info(DMI_BIOS_VERSION),
657                      dmi_get_system_info(DMI_PRODUCT_VERSION));
658                 goto err_unmap;
659         }
660
661 #ifdef CONFIG_DMAR
662         agaw = iommu_calculate_agaw(iommu);
663         if (agaw < 0) {
664                 printk(KERN_ERR
665                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
666                        iommu->seq_id);
667                 goto err_unmap;
668         }
669         msagaw = iommu_calculate_max_sagaw(iommu);
670         if (msagaw < 0) {
671                 printk(KERN_ERR
672                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
673                         iommu->seq_id);
674                 goto err_unmap;
675         }
676 #endif
677         iommu->agaw = agaw;
678         iommu->msagaw = msagaw;
679
680         /* the registers might be more than one page */
681         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
682                 cap_max_fault_reg_offset(iommu->cap));
683         map_size = VTD_PAGE_ALIGN(map_size);
684         if (map_size > VTD_PAGE_SIZE) {
685                 iounmap(iommu->reg);
686                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
687                 if (!iommu->reg) {
688                         printk(KERN_ERR "IOMMU: can't map the region\n");
689                         goto error;
690                 }
691         }
692
693         ver = readl(iommu->reg + DMAR_VER_REG);
694         pr_info("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
695                 (unsigned long long)drhd->reg_base_addr,
696                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
697                 (unsigned long long)iommu->cap,
698                 (unsigned long long)iommu->ecap);
699
700         spin_lock_init(&iommu->register_lock);
701
702         drhd->iommu = iommu;
703         return 0;
704
705  err_unmap:
706         iounmap(iommu->reg);
707  error:
708         kfree(iommu);
709         return -1;
710 }
711
712 void free_iommu(struct intel_iommu *iommu)
713 {
714         if (!iommu)
715                 return;
716
717 #ifdef CONFIG_DMAR
718         free_dmar_iommu(iommu);
719 #endif
720
721         if (iommu->reg)
722                 iounmap(iommu->reg);
723         kfree(iommu);
724 }
725
726 /*
727  * Reclaim all the submitted descriptors which have completed its work.
728  */
729 static inline void reclaim_free_desc(struct q_inval *qi)
730 {
731         while (qi->desc_status[qi->free_tail] == QI_DONE ||
732                qi->desc_status[qi->free_tail] == QI_ABORT) {
733                 qi->desc_status[qi->free_tail] = QI_FREE;
734                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
735                 qi->free_cnt++;
736         }
737 }
738
739 static int qi_check_fault(struct intel_iommu *iommu, int index)
740 {
741         u32 fault;
742         int head, tail;
743         struct q_inval *qi = iommu->qi;
744         int wait_index = (index + 1) % QI_LENGTH;
745
746         if (qi->desc_status[wait_index] == QI_ABORT)
747                 return -EAGAIN;
748
749         fault = readl(iommu->reg + DMAR_FSTS_REG);
750
751         /*
752          * If IQE happens, the head points to the descriptor associated
753          * with the error. No new descriptors are fetched until the IQE
754          * is cleared.
755          */
756         if (fault & DMA_FSTS_IQE) {
757                 head = readl(iommu->reg + DMAR_IQH_REG);
758                 if ((head >> DMAR_IQ_SHIFT) == index) {
759                         printk(KERN_ERR "VT-d detected invalid descriptor: "
760                                 "low=%llx, high=%llx\n",
761                                 (unsigned long long)qi->desc[index].low,
762                                 (unsigned long long)qi->desc[index].high);
763                         memcpy(&qi->desc[index], &qi->desc[wait_index],
764                                         sizeof(struct qi_desc));
765                         __iommu_flush_cache(iommu, &qi->desc[index],
766                                         sizeof(struct qi_desc));
767                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
768                         return -EINVAL;
769                 }
770         }
771
772         /*
773          * If ITE happens, all pending wait_desc commands are aborted.
774          * No new descriptors are fetched until the ITE is cleared.
775          */
776         if (fault & DMA_FSTS_ITE) {
777                 head = readl(iommu->reg + DMAR_IQH_REG);
778                 head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
779                 head |= 1;
780                 tail = readl(iommu->reg + DMAR_IQT_REG);
781                 tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
782
783                 writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);
784
785                 do {
786                         if (qi->desc_status[head] == QI_IN_USE)
787                                 qi->desc_status[head] = QI_ABORT;
788                         head = (head - 2 + QI_LENGTH) % QI_LENGTH;
789                 } while (head != tail);
790
791                 if (qi->desc_status[wait_index] == QI_ABORT)
792                         return -EAGAIN;
793         }
794
795         if (fault & DMA_FSTS_ICE)
796                 writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);
797
798         return 0;
799 }
800
801 /*
802  * Submit the queued invalidation descriptor to the remapping
803  * hardware unit and wait for its completion.
804  */
805 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
806 {
807         int rc;
808         struct q_inval *qi = iommu->qi;
809         struct qi_desc *hw, wait_desc;
810         int wait_index, index;
811         unsigned long flags;
812
813         if (!qi)
814                 return 0;
815
816         hw = qi->desc;
817
818 restart:
819         rc = 0;
820
821         spin_lock_irqsave(&qi->q_lock, flags);
822         while (qi->free_cnt < 3) {
823                 spin_unlock_irqrestore(&qi->q_lock, flags);
824                 cpu_relax();
825                 spin_lock_irqsave(&qi->q_lock, flags);
826         }
827
828         index = qi->free_head;
829         wait_index = (index + 1) % QI_LENGTH;
830
831         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
832
833         hw[index] = *desc;
834
835         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
836                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
837         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
838
839         hw[wait_index] = wait_desc;
840
841         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
842         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
843
844         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
845         qi->free_cnt -= 2;
846
847         /*
848          * update the HW tail register indicating the presence of
849          * new descriptors.
850          */
851         writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
852
853         while (qi->desc_status[wait_index] != QI_DONE) {
854                 /*
855                  * We will leave the interrupts disabled, to prevent interrupt
856                  * context to queue another cmd while a cmd is already submitted
857                  * and waiting for completion on this cpu. This is to avoid
858                  * a deadlock where the interrupt context can wait indefinitely
859                  * for free slots in the queue.
860                  */
861                 rc = qi_check_fault(iommu, index);
862                 if (rc)
863                         break;
864
865                 spin_unlock(&qi->q_lock);
866                 cpu_relax();
867                 spin_lock(&qi->q_lock);
868         }
869
870         qi->desc_status[index] = QI_DONE;
871
872         reclaim_free_desc(qi);
873         spin_unlock_irqrestore(&qi->q_lock, flags);
874
875         if (rc == -EAGAIN)
876                 goto restart;
877
878         return rc;
879 }
880
881 /*
882  * Flush the global interrupt entry cache.
883  */
884 void qi_global_iec(struct intel_iommu *iommu)
885 {
886         struct qi_desc desc;
887
888         desc.low = QI_IEC_TYPE;
889         desc.high = 0;
890
891         /* should never fail */
892         qi_submit_sync(&desc, iommu);
893 }
894
895 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
896                       u64 type)
897 {
898         struct qi_desc desc;
899
900         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
901                         | QI_CC_GRAN(type) | QI_CC_TYPE;
902         desc.high = 0;
903
904         qi_submit_sync(&desc, iommu);
905 }
906
907 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
908                     unsigned int size_order, u64 type)
909 {
910         u8 dw = 0, dr = 0;
911
912         struct qi_desc desc;
913         int ih = 0;
914
915         if (cap_write_drain(iommu->cap))
916                 dw = 1;
917
918         if (cap_read_drain(iommu->cap))
919                 dr = 1;
920
921         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
922                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
923         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
924                 | QI_IOTLB_AM(size_order);
925
926         qi_submit_sync(&desc, iommu);
927 }
928
929 void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
930                         u64 addr, unsigned mask)
931 {
932         struct qi_desc desc;
933
934         if (mask) {
935                 BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
936                 addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
937                 desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
938         } else
939                 desc.high = QI_DEV_IOTLB_ADDR(addr);
940
941         if (qdep >= QI_DEV_IOTLB_MAX_INVS)
942                 qdep = 0;
943
944         desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
945                    QI_DIOTLB_TYPE;
946
947         qi_submit_sync(&desc, iommu);
948 }
949
950 /*
951  * Disable Queued Invalidation interface.
952  */
953 void dmar_disable_qi(struct intel_iommu *iommu)
954 {
955         unsigned long flags;
956         u32 sts;
957         cycles_t start_time = get_cycles();
958
959         if (!ecap_qis(iommu->ecap))
960                 return;
961
962         spin_lock_irqsave(&iommu->register_lock, flags);
963
964         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
965         if (!(sts & DMA_GSTS_QIES))
966                 goto end;
967
968         /*
969          * Give a chance to HW to complete the pending invalidation requests.
970          */
971         while ((readl(iommu->reg + DMAR_IQT_REG) !=
972                 readl(iommu->reg + DMAR_IQH_REG)) &&
973                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
974                 cpu_relax();
975
976         iommu->gcmd &= ~DMA_GCMD_QIE;
977         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
978
979         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
980                       !(sts & DMA_GSTS_QIES), sts);
981 end:
982         spin_unlock_irqrestore(&iommu->register_lock, flags);
983 }
984
985 /*
986  * Enable queued invalidation.
987  */
988 static void __dmar_enable_qi(struct intel_iommu *iommu)
989 {
990         u32 sts;
991         unsigned long flags;
992         struct q_inval *qi = iommu->qi;
993
994         qi->free_head = qi->free_tail = 0;
995         qi->free_cnt = QI_LENGTH;
996
997         spin_lock_irqsave(&iommu->register_lock, flags);
998
999         /* write zero to the tail reg */
1000         writel(0, iommu->reg + DMAR_IQT_REG);
1001
1002         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
1003
1004         iommu->gcmd |= DMA_GCMD_QIE;
1005         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1006
1007         /* Make sure hardware complete it */
1008         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
1009
1010         spin_unlock_irqrestore(&iommu->register_lock, flags);
1011 }
1012
1013 /*
1014  * Enable Queued Invalidation interface. This is a must to support
1015  * interrupt-remapping. Also used by DMA-remapping, which replaces
1016  * register based IOTLB invalidation.
1017  */
1018 int dmar_enable_qi(struct intel_iommu *iommu)
1019 {
1020         struct q_inval *qi;
1021
1022         if (!ecap_qis(iommu->ecap))
1023                 return -ENOENT;
1024
1025         /*
1026          * queued invalidation is already setup and enabled.
1027          */
1028         if (iommu->qi)
1029                 return 0;
1030
1031         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1032         if (!iommu->qi)
1033                 return -ENOMEM;
1034
1035         qi = iommu->qi;
1036
1037         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
1038         if (!qi->desc) {
1039                 kfree(qi);
1040                 iommu->qi = 0;
1041                 return -ENOMEM;
1042         }
1043
1044         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1045         if (!qi->desc_status) {
1046                 free_page((unsigned long) qi->desc);
1047                 kfree(qi);
1048                 iommu->qi = 0;
1049                 return -ENOMEM;
1050         }
1051
1052         qi->free_head = qi->free_tail = 0;
1053         qi->free_cnt = QI_LENGTH;
1054
1055         spin_lock_init(&qi->q_lock);
1056
1057         __dmar_enable_qi(iommu);
1058
1059         return 0;
1060 }
1061
1062 /* iommu interrupt handling. Most stuff are MSI-like. */
1063
1064 enum faulttype {
1065         DMA_REMAP,
1066         INTR_REMAP,
1067         UNKNOWN,
1068 };
1069
1070 static const char *dma_remap_fault_reasons[] =
1071 {
1072         "Software",
1073         "Present bit in root entry is clear",
1074         "Present bit in context entry is clear",
1075         "Invalid context entry",
1076         "Access beyond MGAW",
1077         "PTE Write access is not set",
1078         "PTE Read access is not set",
1079         "Next page table ptr is invalid",
1080         "Root table address invalid",
1081         "Context table ptr is invalid",
1082         "non-zero reserved fields in RTP",
1083         "non-zero reserved fields in CTP",
1084         "non-zero reserved fields in PTE",
1085 };
1086
1087 static const char *intr_remap_fault_reasons[] =
1088 {
1089         "Detected reserved fields in the decoded interrupt-remapped request",
1090         "Interrupt index exceeded the interrupt-remapping table size",
1091         "Present field in the IRTE entry is clear",
1092         "Error accessing interrupt-remapping table pointed by IRTA_REG",
1093         "Detected reserved fields in the IRTE entry",
1094         "Blocked a compatibility format interrupt request",
1095         "Blocked an interrupt request due to source-id verification failure",
1096 };
1097
1098 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
1099
1100 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1101 {
1102         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
1103                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
1104                 *fault_type = INTR_REMAP;
1105                 return intr_remap_fault_reasons[fault_reason - 0x20];
1106         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
1107                 *fault_type = DMA_REMAP;
1108                 return dma_remap_fault_reasons[fault_reason];
1109         } else {
1110                 *fault_type = UNKNOWN;
1111                 return "Unknown";
1112         }
1113 }
1114
1115 void dmar_msi_unmask(unsigned int irq)
1116 {
1117         struct intel_iommu *iommu = get_irq_data(irq);
1118         unsigned long flag;
1119
1120         /* unmask it */
1121         spin_lock_irqsave(&iommu->register_lock, flag);
1122         writel(0, iommu->reg + DMAR_FECTL_REG);
1123         /* Read a reg to force flush the post write */
1124         readl(iommu->reg + DMAR_FECTL_REG);
1125         spin_unlock_irqrestore(&iommu->register_lock, flag);
1126 }
1127
1128 void dmar_msi_mask(unsigned int irq)
1129 {
1130         unsigned long flag;
1131         struct intel_iommu *iommu = get_irq_data(irq);
1132
1133         /* mask it */
1134         spin_lock_irqsave(&iommu->register_lock, flag);
1135         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
1136         /* Read a reg to force flush the post write */
1137         readl(iommu->reg + DMAR_FECTL_REG);
1138         spin_unlock_irqrestore(&iommu->register_lock, flag);
1139 }
1140
1141 void dmar_msi_write(int irq, struct msi_msg *msg)
1142 {
1143         struct intel_iommu *iommu = get_irq_data(irq);
1144         unsigned long flag;
1145
1146         spin_lock_irqsave(&iommu->register_lock, flag);
1147         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
1148         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
1149         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1150         spin_unlock_irqrestore(&iommu->register_lock, flag);
1151 }
1152
1153 void dmar_msi_read(int irq, struct msi_msg *msg)
1154 {
1155         struct intel_iommu *iommu = get_irq_data(irq);
1156         unsigned long flag;
1157
1158         spin_lock_irqsave(&iommu->register_lock, flag);
1159         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
1160         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
1161         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1162         spin_unlock_irqrestore(&iommu->register_lock, flag);
1163 }
1164
1165 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
1166                 u8 fault_reason, u16 source_id, unsigned long long addr)
1167 {
1168         const char *reason;
1169         int fault_type;
1170
1171         reason = dmar_get_fault_reason(fault_reason, &fault_type);
1172
1173         if (fault_type == INTR_REMAP)
1174                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
1175                        "fault index %llx\n"
1176                         "INTR-REMAP:[fault reason %02d] %s\n",
1177                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1178                         PCI_FUNC(source_id & 0xFF), addr >> 48,
1179                         fault_reason, reason);
1180         else
1181                 printk(KERN_ERR
1182                        "DMAR:[%s] Request device [%02x:%02x.%d] "
1183                        "fault addr %llx \n"
1184                        "DMAR:[fault reason %02d] %s\n",
1185                        (type ? "DMA Read" : "DMA Write"),
1186                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
1187                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1188         return 0;
1189 }
1190
1191 #define PRIMARY_FAULT_REG_LEN (16)
1192 irqreturn_t dmar_fault(int irq, void *dev_id)
1193 {
1194         struct intel_iommu *iommu = dev_id;
1195         int reg, fault_index;
1196         u32 fault_status;
1197         unsigned long flag;
1198
1199         spin_lock_irqsave(&iommu->register_lock, flag);
1200         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1201         if (fault_status)
1202                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1203                        fault_status);
1204
1205         /* TBD: ignore advanced fault log currently */
1206         if (!(fault_status & DMA_FSTS_PPF))
1207                 goto clear_rest;
1208
1209         fault_index = dma_fsts_fault_record_index(fault_status);
1210         reg = cap_fault_reg_offset(iommu->cap);
1211         while (1) {
1212                 u8 fault_reason;
1213                 u16 source_id;
1214                 u64 guest_addr;
1215                 int type;
1216                 u32 data;
1217
1218                 /* highest 32 bits */
1219                 data = readl(iommu->reg + reg +
1220                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1221                 if (!(data & DMA_FRCD_F))
1222                         break;
1223
1224                 fault_reason = dma_frcd_fault_reason(data);
1225                 type = dma_frcd_type(data);
1226
1227                 data = readl(iommu->reg + reg +
1228                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1229                 source_id = dma_frcd_source_id(data);
1230
1231                 guest_addr = dmar_readq(iommu->reg + reg +
1232                                 fault_index * PRIMARY_FAULT_REG_LEN);
1233                 guest_addr = dma_frcd_page_addr(guest_addr);
1234                 /* clear the fault */
1235                 writel(DMA_FRCD_F, iommu->reg + reg +
1236                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1237
1238                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1239
1240                 dmar_fault_do_one(iommu, type, fault_reason,
1241                                 source_id, guest_addr);
1242
1243                 fault_index++;
1244                 if (fault_index >= cap_num_fault_regs(iommu->cap))
1245                         fault_index = 0;
1246                 spin_lock_irqsave(&iommu->register_lock, flag);
1247         }
1248 clear_rest:
1249         /* clear all the other faults */
1250         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1251         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1252
1253         spin_unlock_irqrestore(&iommu->register_lock, flag);
1254         return IRQ_HANDLED;
1255 }
1256
1257 int dmar_set_interrupt(struct intel_iommu *iommu)
1258 {
1259         int irq, ret;
1260
1261         /*
1262          * Check if the fault interrupt is already initialized.
1263          */
1264         if (iommu->irq)
1265                 return 0;
1266
1267         irq = create_irq();
1268         if (!irq) {
1269                 printk(KERN_ERR "IOMMU: no free vectors\n");
1270                 return -EINVAL;
1271         }
1272
1273         set_irq_data(irq, iommu);
1274         iommu->irq = irq;
1275
1276         ret = arch_setup_dmar_msi(irq);
1277         if (ret) {
1278                 set_irq_data(irq, NULL);
1279                 iommu->irq = 0;
1280                 destroy_irq(irq);
1281                 return ret;
1282         }
1283
1284         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1285         if (ret)
1286                 printk(KERN_ERR "IOMMU: can't request irq\n");
1287         return ret;
1288 }
1289
1290 int __init enable_drhd_fault_handling(void)
1291 {
1292         struct dmar_drhd_unit *drhd;
1293
1294         /*
1295          * Enable fault control interrupt.
1296          */
1297         for_each_drhd_unit(drhd) {
1298                 int ret;
1299                 struct intel_iommu *iommu = drhd->iommu;
1300                 ret = dmar_set_interrupt(iommu);
1301
1302                 if (ret) {
1303                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1304                                " interrupt, ret %d\n",
1305                                (unsigned long long)drhd->reg_base_addr, ret);
1306                         return -1;
1307                 }
1308         }
1309
1310         return 0;
1311 }
1312
1313 /*
1314  * Re-enable Queued Invalidation interface.
1315  */
1316 int dmar_reenable_qi(struct intel_iommu *iommu)
1317 {
1318         if (!ecap_qis(iommu->ecap))
1319                 return -ENOENT;
1320
1321         if (!iommu->qi)
1322                 return -ENOENT;
1323
1324         /*
1325          * First disable queued invalidation.
1326          */
1327         dmar_disable_qi(iommu);
1328         /*
1329          * Then enable queued invalidation again. Since there is no pending
1330          * invalidation requests now, it's safe to re-enable queued
1331          * invalidation.
1332          */
1333         __dmar_enable_qi(iommu);
1334
1335         return 0;
1336 }
1337
1338 /*
1339  * Check interrupt remapping support in DMAR table description.
1340  */
1341 int dmar_ir_support(void)
1342 {
1343         struct acpi_table_dmar *dmar;
1344         dmar = (struct acpi_table_dmar *)dmar_tbl;
1345         return dmar->flags & 0x1;
1346 }