]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/pci/dmar.c
intel-iommu: Clean up handling of "caching mode" vs. IOTLB flushing.
[net-next-2.6.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         drhd = (struct acpi_dmar_hardware_unit *)header;
177         if (!drhd->address) {
178                 /* Promote an attitude of violence to a BIOS engineer today */
179                 WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n"
180                      "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
181                      dmi_get_system_info(DMI_BIOS_VENDOR),
182                      dmi_get_system_info(DMI_BIOS_VERSION),
183                      dmi_get_system_info(DMI_PRODUCT_VERSION));
184                 return -ENODEV;
185         }
186         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
187         if (!dmaru)
188                 return -ENOMEM;
189
190         dmaru->hdr = header;
191         dmaru->reg_base_addr = drhd->address;
192         dmaru->segment = drhd->segment;
193         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
194
195         ret = alloc_iommu(dmaru);
196         if (ret) {
197                 kfree(dmaru);
198                 return ret;
199         }
200         dmar_register_drhd_unit(dmaru);
201         return 0;
202 }
203
204 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
205 {
206         struct acpi_dmar_hardware_unit *drhd;
207         int ret = 0;
208
209         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
210
211         if (dmaru->include_all)
212                 return 0;
213
214         ret = dmar_parse_dev_scope((void *)(drhd + 1),
215                                 ((void *)drhd) + drhd->header.length,
216                                 &dmaru->devices_cnt, &dmaru->devices,
217                                 drhd->segment);
218         if (ret) {
219                 list_del(&dmaru->list);
220                 kfree(dmaru);
221         }
222         return ret;
223 }
224
225 #ifdef CONFIG_DMAR
226 LIST_HEAD(dmar_rmrr_units);
227
228 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
229 {
230         list_add(&rmrr->list, &dmar_rmrr_units);
231 }
232
233
234 static int __init
235 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
236 {
237         struct acpi_dmar_reserved_memory *rmrr;
238         struct dmar_rmrr_unit *rmrru;
239
240         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
241         if (!rmrru)
242                 return -ENOMEM;
243
244         rmrru->hdr = header;
245         rmrr = (struct acpi_dmar_reserved_memory *)header;
246         rmrru->base_address = rmrr->base_address;
247         rmrru->end_address = rmrr->end_address;
248
249         dmar_register_rmrr_unit(rmrru);
250         return 0;
251 }
252
253 static int __init
254 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
255 {
256         struct acpi_dmar_reserved_memory *rmrr;
257         int ret;
258
259         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
260         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
261                 ((void *)rmrr) + rmrr->header.length,
262                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
263
264         if (ret || (rmrru->devices_cnt == 0)) {
265                 list_del(&rmrru->list);
266                 kfree(rmrru);
267         }
268         return ret;
269 }
270 #endif
271
272 static void __init
273 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
274 {
275         struct acpi_dmar_hardware_unit *drhd;
276         struct acpi_dmar_reserved_memory *rmrr;
277
278         switch (header->type) {
279         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
280                 drhd = (struct acpi_dmar_hardware_unit *)header;
281                 printk (KERN_INFO PREFIX
282                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
283                         drhd->flags, (unsigned long long)drhd->address);
284                 break;
285         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
286                 rmrr = (struct acpi_dmar_reserved_memory *)header;
287
288                 printk (KERN_INFO PREFIX
289                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
290                         (unsigned long long)rmrr->base_address,
291                         (unsigned long long)rmrr->end_address);
292                 break;
293         }
294 }
295
296 /**
297  * dmar_table_detect - checks to see if the platform supports DMAR devices
298  */
299 static int __init dmar_table_detect(void)
300 {
301         acpi_status status = AE_OK;
302
303         /* if we could find DMAR table, then there are DMAR devices */
304         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
305                                 (struct acpi_table_header **)&dmar_tbl,
306                                 &dmar_tbl_size);
307
308         if (ACPI_SUCCESS(status) && !dmar_tbl) {
309                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
310                 status = AE_NOT_FOUND;
311         }
312
313         return (ACPI_SUCCESS(status) ? 1 : 0);
314 }
315
316 /**
317  * parse_dmar_table - parses the DMA reporting table
318  */
319 static int __init
320 parse_dmar_table(void)
321 {
322         struct acpi_table_dmar *dmar;
323         struct acpi_dmar_header *entry_header;
324         int ret = 0;
325
326         /*
327          * Do it again, earlier dmar_tbl mapping could be mapped with
328          * fixed map.
329          */
330         dmar_table_detect();
331
332         dmar = (struct acpi_table_dmar *)dmar_tbl;
333         if (!dmar)
334                 return -ENODEV;
335
336         if (dmar->width < PAGE_SHIFT - 1) {
337                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
338                 return -EINVAL;
339         }
340
341         printk (KERN_INFO PREFIX "Host address width %d\n",
342                 dmar->width + 1);
343
344         entry_header = (struct acpi_dmar_header *)(dmar + 1);
345         while (((unsigned long)entry_header) <
346                         (((unsigned long)dmar) + dmar_tbl->length)) {
347                 /* Avoid looping forever on bad ACPI tables */
348                 if (entry_header->length == 0) {
349                         printk(KERN_WARNING PREFIX
350                                 "Invalid 0-length structure\n");
351                         ret = -EINVAL;
352                         break;
353                 }
354
355                 dmar_table_print_dmar_entry(entry_header);
356
357                 switch (entry_header->type) {
358                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
359                         ret = dmar_parse_one_drhd(entry_header);
360                         break;
361                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
362 #ifdef CONFIG_DMAR
363                         ret = dmar_parse_one_rmrr(entry_header);
364 #endif
365                         break;
366                 default:
367                         printk(KERN_WARNING PREFIX
368                                 "Unknown DMAR structure type\n");
369                         ret = 0; /* for forward compatibility */
370                         break;
371                 }
372                 if (ret)
373                         break;
374
375                 entry_header = ((void *)entry_header + entry_header->length);
376         }
377         return ret;
378 }
379
380 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
381                           struct pci_dev *dev)
382 {
383         int index;
384
385         while (dev) {
386                 for (index = 0; index < cnt; index++)
387                         if (dev == devices[index])
388                                 return 1;
389
390                 /* Check our parent */
391                 dev = dev->bus->self;
392         }
393
394         return 0;
395 }
396
397 struct dmar_drhd_unit *
398 dmar_find_matched_drhd_unit(struct pci_dev *dev)
399 {
400         struct dmar_drhd_unit *dmaru = NULL;
401         struct acpi_dmar_hardware_unit *drhd;
402
403         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
404                 drhd = container_of(dmaru->hdr,
405                                     struct acpi_dmar_hardware_unit,
406                                     header);
407
408                 if (dmaru->include_all &&
409                     drhd->segment == pci_domain_nr(dev->bus))
410                         return dmaru;
411
412                 if (dmar_pci_device_match(dmaru->devices,
413                                           dmaru->devices_cnt, dev))
414                         return dmaru;
415         }
416
417         return NULL;
418 }
419
420 int __init dmar_dev_scope_init(void)
421 {
422         struct dmar_drhd_unit *drhd, *drhd_n;
423         int ret = -ENODEV;
424
425         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
426                 ret = dmar_parse_dev(drhd);
427                 if (ret)
428                         return ret;
429         }
430
431 #ifdef CONFIG_DMAR
432         {
433                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
434                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
435                         ret = rmrr_parse_dev(rmrr);
436                         if (ret)
437                                 return ret;
438                 }
439         }
440 #endif
441
442         return ret;
443 }
444
445
446 int __init dmar_table_init(void)
447 {
448         static int dmar_table_initialized;
449         int ret;
450
451         if (dmar_table_initialized)
452                 return 0;
453
454         dmar_table_initialized = 1;
455
456         ret = parse_dmar_table();
457         if (ret) {
458                 if (ret != -ENODEV)
459                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
460                 return ret;
461         }
462
463         if (list_empty(&dmar_drhd_units)) {
464                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
465                 return -ENODEV;
466         }
467
468 #ifdef CONFIG_DMAR
469         if (list_empty(&dmar_rmrr_units))
470                 printk(KERN_INFO PREFIX "No RMRR found\n");
471 #endif
472
473 #ifdef CONFIG_INTR_REMAP
474         parse_ioapics_under_ir();
475 #endif
476         return 0;
477 }
478
479 void __init detect_intel_iommu(void)
480 {
481         int ret;
482
483         ret = dmar_table_detect();
484
485         {
486 #ifdef CONFIG_INTR_REMAP
487                 struct acpi_table_dmar *dmar;
488                 /*
489                  * for now we will disable dma-remapping when interrupt
490                  * remapping is enabled.
491                  * When support for queued invalidation for IOTLB invalidation
492                  * is added, we will not need this any more.
493                  */
494                 dmar = (struct acpi_table_dmar *) dmar_tbl;
495                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
496                         printk(KERN_INFO
497                                "Queued invalidation will be enabled to support "
498                                "x2apic and Intr-remapping.\n");
499 #endif
500 #ifdef CONFIG_DMAR
501                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
502                     !dmar_disabled)
503                         iommu_detected = 1;
504 #endif
505         }
506         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
507         dmar_tbl = NULL;
508 }
509
510
511 int alloc_iommu(struct dmar_drhd_unit *drhd)
512 {
513         struct intel_iommu *iommu;
514         int map_size;
515         u32 ver;
516         static int iommu_allocated = 0;
517         int agaw = 0;
518         int msagaw = 0;
519
520         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
521         if (!iommu)
522                 return -ENOMEM;
523
524         iommu->seq_id = iommu_allocated++;
525         sprintf (iommu->name, "dmar%d", iommu->seq_id);
526
527         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
528         if (!iommu->reg) {
529                 printk(KERN_ERR "IOMMU: can't map the region\n");
530                 goto error;
531         }
532         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
533         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
534
535 #ifdef CONFIG_DMAR
536         agaw = iommu_calculate_agaw(iommu);
537         if (agaw < 0) {
538                 printk(KERN_ERR
539                        "Cannot get a valid agaw for iommu (seq_id = %d)\n",
540                        iommu->seq_id);
541                 goto error;
542         }
543         msagaw = iommu_calculate_max_sagaw(iommu);
544         if (msagaw < 0) {
545                 printk(KERN_ERR
546                         "Cannot get a valid max agaw for iommu (seq_id = %d)\n",
547                         iommu->seq_id);
548                 goto error;
549         }
550 #endif
551         iommu->agaw = agaw;
552         iommu->msagaw = msagaw;
553
554         /* the registers might be more than one page */
555         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
556                 cap_max_fault_reg_offset(iommu->cap));
557         map_size = VTD_PAGE_ALIGN(map_size);
558         if (map_size > VTD_PAGE_SIZE) {
559                 iounmap(iommu->reg);
560                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
561                 if (!iommu->reg) {
562                         printk(KERN_ERR "IOMMU: can't map the region\n");
563                         goto error;
564                 }
565         }
566
567         ver = readl(iommu->reg + DMAR_VER_REG);
568         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
569                 (unsigned long long)drhd->reg_base_addr,
570                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
571                 (unsigned long long)iommu->cap,
572                 (unsigned long long)iommu->ecap);
573
574         spin_lock_init(&iommu->register_lock);
575
576         drhd->iommu = iommu;
577         return 0;
578 error:
579         kfree(iommu);
580         return -1;
581 }
582
583 void free_iommu(struct intel_iommu *iommu)
584 {
585         if (!iommu)
586                 return;
587
588 #ifdef CONFIG_DMAR
589         free_dmar_iommu(iommu);
590 #endif
591
592         if (iommu->reg)
593                 iounmap(iommu->reg);
594         kfree(iommu);
595 }
596
597 /*
598  * Reclaim all the submitted descriptors which have completed its work.
599  */
600 static inline void reclaim_free_desc(struct q_inval *qi)
601 {
602         while (qi->desc_status[qi->free_tail] == QI_DONE) {
603                 qi->desc_status[qi->free_tail] = QI_FREE;
604                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
605                 qi->free_cnt++;
606         }
607 }
608
609 static int qi_check_fault(struct intel_iommu *iommu, int index)
610 {
611         u32 fault;
612         int head;
613         struct q_inval *qi = iommu->qi;
614         int wait_index = (index + 1) % QI_LENGTH;
615
616         fault = readl(iommu->reg + DMAR_FSTS_REG);
617
618         /*
619          * If IQE happens, the head points to the descriptor associated
620          * with the error. No new descriptors are fetched until the IQE
621          * is cleared.
622          */
623         if (fault & DMA_FSTS_IQE) {
624                 head = readl(iommu->reg + DMAR_IQH_REG);
625                 if ((head >> 4) == index) {
626                         memcpy(&qi->desc[index], &qi->desc[wait_index],
627                                         sizeof(struct qi_desc));
628                         __iommu_flush_cache(iommu, &qi->desc[index],
629                                         sizeof(struct qi_desc));
630                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
631                         return -EINVAL;
632                 }
633         }
634
635         return 0;
636 }
637
638 /*
639  * Submit the queued invalidation descriptor to the remapping
640  * hardware unit and wait for its completion.
641  */
642 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
643 {
644         int rc = 0;
645         struct q_inval *qi = iommu->qi;
646         struct qi_desc *hw, wait_desc;
647         int wait_index, index;
648         unsigned long flags;
649
650         if (!qi)
651                 return 0;
652
653         hw = qi->desc;
654
655         spin_lock_irqsave(&qi->q_lock, flags);
656         while (qi->free_cnt < 3) {
657                 spin_unlock_irqrestore(&qi->q_lock, flags);
658                 cpu_relax();
659                 spin_lock_irqsave(&qi->q_lock, flags);
660         }
661
662         index = qi->free_head;
663         wait_index = (index + 1) % QI_LENGTH;
664
665         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
666
667         hw[index] = *desc;
668
669         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
670                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
671         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
672
673         hw[wait_index] = wait_desc;
674
675         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
676         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
677
678         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
679         qi->free_cnt -= 2;
680
681         /*
682          * update the HW tail register indicating the presence of
683          * new descriptors.
684          */
685         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
686
687         while (qi->desc_status[wait_index] != QI_DONE) {
688                 /*
689                  * We will leave the interrupts disabled, to prevent interrupt
690                  * context to queue another cmd while a cmd is already submitted
691                  * and waiting for completion on this cpu. This is to avoid
692                  * a deadlock where the interrupt context can wait indefinitely
693                  * for free slots in the queue.
694                  */
695                 rc = qi_check_fault(iommu, index);
696                 if (rc)
697                         goto out;
698
699                 spin_unlock(&qi->q_lock);
700                 cpu_relax();
701                 spin_lock(&qi->q_lock);
702         }
703 out:
704         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
705
706         reclaim_free_desc(qi);
707         spin_unlock_irqrestore(&qi->q_lock, flags);
708
709         return rc;
710 }
711
712 /*
713  * Flush the global interrupt entry cache.
714  */
715 void qi_global_iec(struct intel_iommu *iommu)
716 {
717         struct qi_desc desc;
718
719         desc.low = QI_IEC_TYPE;
720         desc.high = 0;
721
722         /* should never fail */
723         qi_submit_sync(&desc, iommu);
724 }
725
726 void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
727                       u64 type)
728 {
729         struct qi_desc desc;
730
731         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
732                         | QI_CC_GRAN(type) | QI_CC_TYPE;
733         desc.high = 0;
734
735         qi_submit_sync(&desc, iommu);
736 }
737
738 void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
739                     unsigned int size_order, u64 type)
740 {
741         u8 dw = 0, dr = 0;
742
743         struct qi_desc desc;
744         int ih = 0;
745
746         if (cap_write_drain(iommu->cap))
747                 dw = 1;
748
749         if (cap_read_drain(iommu->cap))
750                 dr = 1;
751
752         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
753                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
754         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
755                 | QI_IOTLB_AM(size_order);
756
757         qi_submit_sync(&desc, iommu);
758 }
759
760 /*
761  * Disable Queued Invalidation interface.
762  */
763 void dmar_disable_qi(struct intel_iommu *iommu)
764 {
765         unsigned long flags;
766         u32 sts;
767         cycles_t start_time = get_cycles();
768
769         if (!ecap_qis(iommu->ecap))
770                 return;
771
772         spin_lock_irqsave(&iommu->register_lock, flags);
773
774         sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
775         if (!(sts & DMA_GSTS_QIES))
776                 goto end;
777
778         /*
779          * Give a chance to HW to complete the pending invalidation requests.
780          */
781         while ((readl(iommu->reg + DMAR_IQT_REG) !=
782                 readl(iommu->reg + DMAR_IQH_REG)) &&
783                 (DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
784                 cpu_relax();
785
786         iommu->gcmd &= ~DMA_GCMD_QIE;
787
788         writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
789
790         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
791                       !(sts & DMA_GSTS_QIES), sts);
792 end:
793         spin_unlock_irqrestore(&iommu->register_lock, flags);
794 }
795
796 /*
797  * Enable queued invalidation.
798  */
799 static void __dmar_enable_qi(struct intel_iommu *iommu)
800 {
801         u32 cmd, sts;
802         unsigned long flags;
803         struct q_inval *qi = iommu->qi;
804
805         qi->free_head = qi->free_tail = 0;
806         qi->free_cnt = QI_LENGTH;
807
808         spin_lock_irqsave(&iommu->register_lock, flags);
809
810         /* write zero to the tail reg */
811         writel(0, iommu->reg + DMAR_IQT_REG);
812
813         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
814
815         cmd = iommu->gcmd | DMA_GCMD_QIE;
816         iommu->gcmd |= DMA_GCMD_QIE;
817         writel(cmd, iommu->reg + DMAR_GCMD_REG);
818
819         /* Make sure hardware complete it */
820         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
821
822         spin_unlock_irqrestore(&iommu->register_lock, flags);
823 }
824
825 /*
826  * Enable Queued Invalidation interface. This is a must to support
827  * interrupt-remapping. Also used by DMA-remapping, which replaces
828  * register based IOTLB invalidation.
829  */
830 int dmar_enable_qi(struct intel_iommu *iommu)
831 {
832         struct q_inval *qi;
833
834         if (!ecap_qis(iommu->ecap))
835                 return -ENOENT;
836
837         /*
838          * queued invalidation is already setup and enabled.
839          */
840         if (iommu->qi)
841                 return 0;
842
843         iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
844         if (!iommu->qi)
845                 return -ENOMEM;
846
847         qi = iommu->qi;
848
849         qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC));
850         if (!qi->desc) {
851                 kfree(qi);
852                 iommu->qi = 0;
853                 return -ENOMEM;
854         }
855
856         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
857         if (!qi->desc_status) {
858                 free_page((unsigned long) qi->desc);
859                 kfree(qi);
860                 iommu->qi = 0;
861                 return -ENOMEM;
862         }
863
864         qi->free_head = qi->free_tail = 0;
865         qi->free_cnt = QI_LENGTH;
866
867         spin_lock_init(&qi->q_lock);
868
869         __dmar_enable_qi(iommu);
870
871         return 0;
872 }
873
874 /* iommu interrupt handling. Most stuff are MSI-like. */
875
876 enum faulttype {
877         DMA_REMAP,
878         INTR_REMAP,
879         UNKNOWN,
880 };
881
882 static const char *dma_remap_fault_reasons[] =
883 {
884         "Software",
885         "Present bit in root entry is clear",
886         "Present bit in context entry is clear",
887         "Invalid context entry",
888         "Access beyond MGAW",
889         "PTE Write access is not set",
890         "PTE Read access is not set",
891         "Next page table ptr is invalid",
892         "Root table address invalid",
893         "Context table ptr is invalid",
894         "non-zero reserved fields in RTP",
895         "non-zero reserved fields in CTP",
896         "non-zero reserved fields in PTE",
897 };
898
899 static const char *intr_remap_fault_reasons[] =
900 {
901         "Detected reserved fields in the decoded interrupt-remapped request",
902         "Interrupt index exceeded the interrupt-remapping table size",
903         "Present field in the IRTE entry is clear",
904         "Error accessing interrupt-remapping table pointed by IRTA_REG",
905         "Detected reserved fields in the IRTE entry",
906         "Blocked a compatibility format interrupt request",
907         "Blocked an interrupt request due to source-id verification failure",
908 };
909
910 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
911
912 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
913 {
914         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
915                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
916                 *fault_type = INTR_REMAP;
917                 return intr_remap_fault_reasons[fault_reason - 0x20];
918         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
919                 *fault_type = DMA_REMAP;
920                 return dma_remap_fault_reasons[fault_reason];
921         } else {
922                 *fault_type = UNKNOWN;
923                 return "Unknown";
924         }
925 }
926
927 void dmar_msi_unmask(unsigned int irq)
928 {
929         struct intel_iommu *iommu = get_irq_data(irq);
930         unsigned long flag;
931
932         /* unmask it */
933         spin_lock_irqsave(&iommu->register_lock, flag);
934         writel(0, iommu->reg + DMAR_FECTL_REG);
935         /* Read a reg to force flush the post write */
936         readl(iommu->reg + DMAR_FECTL_REG);
937         spin_unlock_irqrestore(&iommu->register_lock, flag);
938 }
939
940 void dmar_msi_mask(unsigned int irq)
941 {
942         unsigned long flag;
943         struct intel_iommu *iommu = get_irq_data(irq);
944
945         /* mask it */
946         spin_lock_irqsave(&iommu->register_lock, flag);
947         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
948         /* Read a reg to force flush the post write */
949         readl(iommu->reg + DMAR_FECTL_REG);
950         spin_unlock_irqrestore(&iommu->register_lock, flag);
951 }
952
953 void dmar_msi_write(int irq, struct msi_msg *msg)
954 {
955         struct intel_iommu *iommu = get_irq_data(irq);
956         unsigned long flag;
957
958         spin_lock_irqsave(&iommu->register_lock, flag);
959         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
960         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
961         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
962         spin_unlock_irqrestore(&iommu->register_lock, flag);
963 }
964
965 void dmar_msi_read(int irq, struct msi_msg *msg)
966 {
967         struct intel_iommu *iommu = get_irq_data(irq);
968         unsigned long flag;
969
970         spin_lock_irqsave(&iommu->register_lock, flag);
971         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
972         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
973         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
974         spin_unlock_irqrestore(&iommu->register_lock, flag);
975 }
976
977 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
978                 u8 fault_reason, u16 source_id, unsigned long long addr)
979 {
980         const char *reason;
981         int fault_type;
982
983         reason = dmar_get_fault_reason(fault_reason, &fault_type);
984
985         if (fault_type == INTR_REMAP)
986                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
987                        "fault index %llx\n"
988                         "INTR-REMAP:[fault reason %02d] %s\n",
989                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
990                         PCI_FUNC(source_id & 0xFF), addr >> 48,
991                         fault_reason, reason);
992         else
993                 printk(KERN_ERR
994                        "DMAR:[%s] Request device [%02x:%02x.%d] "
995                        "fault addr %llx \n"
996                        "DMAR:[fault reason %02d] %s\n",
997                        (type ? "DMA Read" : "DMA Write"),
998                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
999                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1000         return 0;
1001 }
1002
1003 #define PRIMARY_FAULT_REG_LEN (16)
1004 irqreturn_t dmar_fault(int irq, void *dev_id)
1005 {
1006         struct intel_iommu *iommu = dev_id;
1007         int reg, fault_index;
1008         u32 fault_status;
1009         unsigned long flag;
1010
1011         spin_lock_irqsave(&iommu->register_lock, flag);
1012         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1013         if (fault_status)
1014                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
1015                        fault_status);
1016
1017         /* TBD: ignore advanced fault log currently */
1018         if (!(fault_status & DMA_FSTS_PPF))
1019                 goto clear_rest;
1020
1021         fault_index = dma_fsts_fault_record_index(fault_status);
1022         reg = cap_fault_reg_offset(iommu->cap);
1023         while (1) {
1024                 u8 fault_reason;
1025                 u16 source_id;
1026                 u64 guest_addr;
1027                 int type;
1028                 u32 data;
1029
1030                 /* highest 32 bits */
1031                 data = readl(iommu->reg + reg +
1032                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
1033                 if (!(data & DMA_FRCD_F))
1034                         break;
1035
1036                 fault_reason = dma_frcd_fault_reason(data);
1037                 type = dma_frcd_type(data);
1038
1039                 data = readl(iommu->reg + reg +
1040                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
1041                 source_id = dma_frcd_source_id(data);
1042
1043                 guest_addr = dmar_readq(iommu->reg + reg +
1044                                 fault_index * PRIMARY_FAULT_REG_LEN);
1045                 guest_addr = dma_frcd_page_addr(guest_addr);
1046                 /* clear the fault */
1047                 writel(DMA_FRCD_F, iommu->reg + reg +
1048                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
1049
1050                 spin_unlock_irqrestore(&iommu->register_lock, flag);
1051
1052                 dmar_fault_do_one(iommu, type, fault_reason,
1053                                 source_id, guest_addr);
1054
1055                 fault_index++;
1056                 if (fault_index > cap_num_fault_regs(iommu->cap))
1057                         fault_index = 0;
1058                 spin_lock_irqsave(&iommu->register_lock, flag);
1059         }
1060 clear_rest:
1061         /* clear all the other faults */
1062         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1063         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1064
1065         spin_unlock_irqrestore(&iommu->register_lock, flag);
1066         return IRQ_HANDLED;
1067 }
1068
1069 int dmar_set_interrupt(struct intel_iommu *iommu)
1070 {
1071         int irq, ret;
1072
1073         /*
1074          * Check if the fault interrupt is already initialized.
1075          */
1076         if (iommu->irq)
1077                 return 0;
1078
1079         irq = create_irq();
1080         if (!irq) {
1081                 printk(KERN_ERR "IOMMU: no free vectors\n");
1082                 return -EINVAL;
1083         }
1084
1085         set_irq_data(irq, iommu);
1086         iommu->irq = irq;
1087
1088         ret = arch_setup_dmar_msi(irq);
1089         if (ret) {
1090                 set_irq_data(irq, NULL);
1091                 iommu->irq = 0;
1092                 destroy_irq(irq);
1093                 return 0;
1094         }
1095
1096         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1097         if (ret)
1098                 printk(KERN_ERR "IOMMU: can't request irq\n");
1099         return ret;
1100 }
1101
1102 int __init enable_drhd_fault_handling(void)
1103 {
1104         struct dmar_drhd_unit *drhd;
1105
1106         /*
1107          * Enable fault control interrupt.
1108          */
1109         for_each_drhd_unit(drhd) {
1110                 int ret;
1111                 struct intel_iommu *iommu = drhd->iommu;
1112                 ret = dmar_set_interrupt(iommu);
1113
1114                 if (ret) {
1115                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1116                                " interrupt, ret %d\n",
1117                                (unsigned long long)drhd->reg_base_addr, ret);
1118                         return -1;
1119                 }
1120         }
1121
1122         return 0;
1123 }
1124
1125 /*
1126  * Re-enable Queued Invalidation interface.
1127  */
1128 int dmar_reenable_qi(struct intel_iommu *iommu)
1129 {
1130         if (!ecap_qis(iommu->ecap))
1131                 return -ENOENT;
1132
1133         if (!iommu->qi)
1134                 return -ENOENT;
1135
1136         /*
1137          * First disable queued invalidation.
1138          */
1139         dmar_disable_qi(iommu);
1140         /*
1141          * Then enable queued invalidation again. Since there is no pending
1142          * invalidation requests now, it's safe to re-enable queued
1143          * invalidation.
1144          */
1145         __dmar_enable_qi(iommu);
1146
1147         return 0;
1148 }