]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/pci/dmar.c
x86, x2apic: enable fault handling for intr-remapping
[net-next-2.6.git] / drivers / pci / dmar.c
1 /*
2  * Copyright (c) 2006, Intel Corporation.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms and conditions of the GNU General Public License,
6  * version 2, as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope it will be useful, but WITHOUT
9  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
11  * more details.
12  *
13  * You should have received a copy of the GNU General Public License along with
14  * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
15  * Place - Suite 330, Boston, MA 02111-1307 USA.
16  *
17  * Copyright (C) 2006-2008 Intel Corporation
18  * Author: Ashok Raj <ashok.raj@intel.com>
19  * Author: Shaohua Li <shaohua.li@intel.com>
20  * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21  *
22  * This file implements early detection/parsing of Remapping Devices
23  * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24  * tables.
25  *
26  * These routines are used by both DMA-remapping and Interrupt-remapping
27  */
28
29 #include <linux/pci.h>
30 #include <linux/dmar.h>
31 #include <linux/iova.h>
32 #include <linux/intel-iommu.h>
33 #include <linux/timer.h>
34 #include <linux/irq.h>
35 #include <linux/interrupt.h>
36
37 #undef PREFIX
38 #define PREFIX "DMAR:"
39
40 /* No locks are needed as DMA remapping hardware unit
41  * list is constructed at boot time and hotplug of
42  * these units are not supported by the architecture.
43  */
44 LIST_HEAD(dmar_drhd_units);
45
46 static struct acpi_table_header * __initdata dmar_tbl;
47 static acpi_size dmar_tbl_size;
48
49 static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
50 {
51         /*
52          * add INCLUDE_ALL at the tail, so scan the list will find it at
53          * the very end.
54          */
55         if (drhd->include_all)
56                 list_add_tail(&drhd->list, &dmar_drhd_units);
57         else
58                 list_add(&drhd->list, &dmar_drhd_units);
59 }
60
61 static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62                                            struct pci_dev **dev, u16 segment)
63 {
64         struct pci_bus *bus;
65         struct pci_dev *pdev = NULL;
66         struct acpi_dmar_pci_path *path;
67         int count;
68
69         bus = pci_find_bus(segment, scope->bus);
70         path = (struct acpi_dmar_pci_path *)(scope + 1);
71         count = (scope->length - sizeof(struct acpi_dmar_device_scope))
72                 / sizeof(struct acpi_dmar_pci_path);
73
74         while (count) {
75                 if (pdev)
76                         pci_dev_put(pdev);
77                 /*
78                  * Some BIOSes list non-exist devices in DMAR table, just
79                  * ignore it
80                  */
81                 if (!bus) {
82                         printk(KERN_WARNING
83                         PREFIX "Device scope bus [%d] not found\n",
84                         scope->bus);
85                         break;
86                 }
87                 pdev = pci_get_slot(bus, PCI_DEVFN(path->dev, path->fn));
88                 if (!pdev) {
89                         printk(KERN_WARNING PREFIX
90                         "Device scope device [%04x:%02x:%02x.%02x] not found\n",
91                                 segment, bus->number, path->dev, path->fn);
92                         break;
93                 }
94                 path ++;
95                 count --;
96                 bus = pdev->subordinate;
97         }
98         if (!pdev) {
99                 printk(KERN_WARNING PREFIX
100                 "Device scope device [%04x:%02x:%02x.%02x] not found\n",
101                 segment, scope->bus, path->dev, path->fn);
102                 *dev = NULL;
103                 return 0;
104         }
105         if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \
106                         pdev->subordinate) || (scope->entry_type == \
107                         ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) {
108                 pci_dev_put(pdev);
109                 printk(KERN_WARNING PREFIX
110                         "Device scope type does not match for %s\n",
111                          pci_name(pdev));
112                 return -EINVAL;
113         }
114         *dev = pdev;
115         return 0;
116 }
117
118 static int __init dmar_parse_dev_scope(void *start, void *end, int *cnt,
119                                        struct pci_dev ***devices, u16 segment)
120 {
121         struct acpi_dmar_device_scope *scope;
122         void * tmp = start;
123         int index;
124         int ret;
125
126         *cnt = 0;
127         while (start < end) {
128                 scope = start;
129                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
130                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
131                         (*cnt)++;
132                 else
133                         printk(KERN_WARNING PREFIX
134                                 "Unsupported device scope\n");
135                 start += scope->length;
136         }
137         if (*cnt == 0)
138                 return 0;
139
140         *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL);
141         if (!*devices)
142                 return -ENOMEM;
143
144         start = tmp;
145         index = 0;
146         while (start < end) {
147                 scope = start;
148                 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
149                     scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) {
150                         ret = dmar_parse_one_dev_scope(scope,
151                                 &(*devices)[index], segment);
152                         if (ret) {
153                                 kfree(*devices);
154                                 return ret;
155                         }
156                         index ++;
157                 }
158                 start += scope->length;
159         }
160
161         return 0;
162 }
163
164 /**
165  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
166  * structure which uniquely represent one DMA remapping hardware unit
167  * present in the platform
168  */
169 static int __init
170 dmar_parse_one_drhd(struct acpi_dmar_header *header)
171 {
172         struct acpi_dmar_hardware_unit *drhd;
173         struct dmar_drhd_unit *dmaru;
174         int ret = 0;
175
176         dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
177         if (!dmaru)
178                 return -ENOMEM;
179
180         dmaru->hdr = header;
181         drhd = (struct acpi_dmar_hardware_unit *)header;
182         dmaru->reg_base_addr = drhd->address;
183         dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184
185         ret = alloc_iommu(dmaru);
186         if (ret) {
187                 kfree(dmaru);
188                 return ret;
189         }
190         dmar_register_drhd_unit(dmaru);
191         return 0;
192 }
193
194 static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru)
195 {
196         struct acpi_dmar_hardware_unit *drhd;
197         int ret = 0;
198
199         drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
200
201         if (dmaru->include_all)
202                 return 0;
203
204         ret = dmar_parse_dev_scope((void *)(drhd + 1),
205                                 ((void *)drhd) + drhd->header.length,
206                                 &dmaru->devices_cnt, &dmaru->devices,
207                                 drhd->segment);
208         if (ret) {
209                 list_del(&dmaru->list);
210                 kfree(dmaru);
211         }
212         return ret;
213 }
214
215 #ifdef CONFIG_DMAR
216 LIST_HEAD(dmar_rmrr_units);
217
218 static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
219 {
220         list_add(&rmrr->list, &dmar_rmrr_units);
221 }
222
223
224 static int __init
225 dmar_parse_one_rmrr(struct acpi_dmar_header *header)
226 {
227         struct acpi_dmar_reserved_memory *rmrr;
228         struct dmar_rmrr_unit *rmrru;
229
230         rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
231         if (!rmrru)
232                 return -ENOMEM;
233
234         rmrru->hdr = header;
235         rmrr = (struct acpi_dmar_reserved_memory *)header;
236         rmrru->base_address = rmrr->base_address;
237         rmrru->end_address = rmrr->end_address;
238
239         dmar_register_rmrr_unit(rmrru);
240         return 0;
241 }
242
243 static int __init
244 rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
245 {
246         struct acpi_dmar_reserved_memory *rmrr;
247         int ret;
248
249         rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
250         ret = dmar_parse_dev_scope((void *)(rmrr + 1),
251                 ((void *)rmrr) + rmrr->header.length,
252                 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
253
254         if (ret || (rmrru->devices_cnt == 0)) {
255                 list_del(&rmrru->list);
256                 kfree(rmrru);
257         }
258         return ret;
259 }
260 #endif
261
262 static void __init
263 dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
264 {
265         struct acpi_dmar_hardware_unit *drhd;
266         struct acpi_dmar_reserved_memory *rmrr;
267
268         switch (header->type) {
269         case ACPI_DMAR_TYPE_HARDWARE_UNIT:
270                 drhd = (struct acpi_dmar_hardware_unit *)header;
271                 printk (KERN_INFO PREFIX
272                         "DRHD (flags: 0x%08x)base: 0x%016Lx\n",
273                         drhd->flags, (unsigned long long)drhd->address);
274                 break;
275         case ACPI_DMAR_TYPE_RESERVED_MEMORY:
276                 rmrr = (struct acpi_dmar_reserved_memory *)header;
277
278                 printk (KERN_INFO PREFIX
279                         "RMRR base: 0x%016Lx end: 0x%016Lx\n",
280                         (unsigned long long)rmrr->base_address,
281                         (unsigned long long)rmrr->end_address);
282                 break;
283         }
284 }
285
286 /**
287  * dmar_table_detect - checks to see if the platform supports DMAR devices
288  */
289 static int __init dmar_table_detect(void)
290 {
291         acpi_status status = AE_OK;
292
293         /* if we could find DMAR table, then there are DMAR devices */
294         status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
295                                 (struct acpi_table_header **)&dmar_tbl,
296                                 &dmar_tbl_size);
297
298         if (ACPI_SUCCESS(status) && !dmar_tbl) {
299                 printk (KERN_WARNING PREFIX "Unable to map DMAR\n");
300                 status = AE_NOT_FOUND;
301         }
302
303         return (ACPI_SUCCESS(status) ? 1 : 0);
304 }
305
306 /**
307  * parse_dmar_table - parses the DMA reporting table
308  */
309 static int __init
310 parse_dmar_table(void)
311 {
312         struct acpi_table_dmar *dmar;
313         struct acpi_dmar_header *entry_header;
314         int ret = 0;
315
316         /*
317          * Do it again, earlier dmar_tbl mapping could be mapped with
318          * fixed map.
319          */
320         dmar_table_detect();
321
322         dmar = (struct acpi_table_dmar *)dmar_tbl;
323         if (!dmar)
324                 return -ENODEV;
325
326         if (dmar->width < PAGE_SHIFT - 1) {
327                 printk(KERN_WARNING PREFIX "Invalid DMAR haw\n");
328                 return -EINVAL;
329         }
330
331         printk (KERN_INFO PREFIX "Host address width %d\n",
332                 dmar->width + 1);
333
334         entry_header = (struct acpi_dmar_header *)(dmar + 1);
335         while (((unsigned long)entry_header) <
336                         (((unsigned long)dmar) + dmar_tbl->length)) {
337                 /* Avoid looping forever on bad ACPI tables */
338                 if (entry_header->length == 0) {
339                         printk(KERN_WARNING PREFIX
340                                 "Invalid 0-length structure\n");
341                         ret = -EINVAL;
342                         break;
343                 }
344
345                 dmar_table_print_dmar_entry(entry_header);
346
347                 switch (entry_header->type) {
348                 case ACPI_DMAR_TYPE_HARDWARE_UNIT:
349                         ret = dmar_parse_one_drhd(entry_header);
350                         break;
351                 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
352 #ifdef CONFIG_DMAR
353                         ret = dmar_parse_one_rmrr(entry_header);
354 #endif
355                         break;
356                 default:
357                         printk(KERN_WARNING PREFIX
358                                 "Unknown DMAR structure type\n");
359                         ret = 0; /* for forward compatibility */
360                         break;
361                 }
362                 if (ret)
363                         break;
364
365                 entry_header = ((void *)entry_header + entry_header->length);
366         }
367         return ret;
368 }
369
370 int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
371                           struct pci_dev *dev)
372 {
373         int index;
374
375         while (dev) {
376                 for (index = 0; index < cnt; index++)
377                         if (dev == devices[index])
378                                 return 1;
379
380                 /* Check our parent */
381                 dev = dev->bus->self;
382         }
383
384         return 0;
385 }
386
387 struct dmar_drhd_unit *
388 dmar_find_matched_drhd_unit(struct pci_dev *dev)
389 {
390         struct dmar_drhd_unit *dmaru = NULL;
391         struct acpi_dmar_hardware_unit *drhd;
392
393         list_for_each_entry(dmaru, &dmar_drhd_units, list) {
394                 drhd = container_of(dmaru->hdr,
395                                     struct acpi_dmar_hardware_unit,
396                                     header);
397
398                 if (dmaru->include_all &&
399                     drhd->segment == pci_domain_nr(dev->bus))
400                         return dmaru;
401
402                 if (dmar_pci_device_match(dmaru->devices,
403                                           dmaru->devices_cnt, dev))
404                         return dmaru;
405         }
406
407         return NULL;
408 }
409
410 int __init dmar_dev_scope_init(void)
411 {
412         struct dmar_drhd_unit *drhd, *drhd_n;
413         int ret = -ENODEV;
414
415         list_for_each_entry_safe(drhd, drhd_n, &dmar_drhd_units, list) {
416                 ret = dmar_parse_dev(drhd);
417                 if (ret)
418                         return ret;
419         }
420
421 #ifdef CONFIG_DMAR
422         {
423                 struct dmar_rmrr_unit *rmrr, *rmrr_n;
424                 list_for_each_entry_safe(rmrr, rmrr_n, &dmar_rmrr_units, list) {
425                         ret = rmrr_parse_dev(rmrr);
426                         if (ret)
427                                 return ret;
428                 }
429         }
430 #endif
431
432         return ret;
433 }
434
435
436 int __init dmar_table_init(void)
437 {
438         static int dmar_table_initialized;
439         int ret;
440
441         if (dmar_table_initialized)
442                 return 0;
443
444         dmar_table_initialized = 1;
445
446         ret = parse_dmar_table();
447         if (ret) {
448                 if (ret != -ENODEV)
449                         printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
450                 return ret;
451         }
452
453         if (list_empty(&dmar_drhd_units)) {
454                 printk(KERN_INFO PREFIX "No DMAR devices found\n");
455                 return -ENODEV;
456         }
457
458 #ifdef CONFIG_DMAR
459         if (list_empty(&dmar_rmrr_units))
460                 printk(KERN_INFO PREFIX "No RMRR found\n");
461 #endif
462
463 #ifdef CONFIG_INTR_REMAP
464         parse_ioapics_under_ir();
465 #endif
466         return 0;
467 }
468
469 void __init detect_intel_iommu(void)
470 {
471         int ret;
472
473         ret = dmar_table_detect();
474
475         {
476 #ifdef CONFIG_INTR_REMAP
477                 struct acpi_table_dmar *dmar;
478                 /*
479                  * for now we will disable dma-remapping when interrupt
480                  * remapping is enabled.
481                  * When support for queued invalidation for IOTLB invalidation
482                  * is added, we will not need this any more.
483                  */
484                 dmar = (struct acpi_table_dmar *) dmar_tbl;
485                 if (ret && cpu_has_x2apic && dmar->flags & 0x1)
486                         printk(KERN_INFO
487                                "Queued invalidation will be enabled to support "
488                                "x2apic and Intr-remapping.\n");
489 #endif
490 #ifdef CONFIG_DMAR
491                 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
492                     !dmar_disabled)
493                         iommu_detected = 1;
494 #endif
495         }
496         early_acpi_os_unmap_memory(dmar_tbl, dmar_tbl_size);
497         dmar_tbl = NULL;
498 }
499
500
501 int alloc_iommu(struct dmar_drhd_unit *drhd)
502 {
503         struct intel_iommu *iommu;
504         int map_size;
505         u32 ver;
506         static int iommu_allocated = 0;
507         int agaw = 0;
508
509         iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
510         if (!iommu)
511                 return -ENOMEM;
512
513         iommu->seq_id = iommu_allocated++;
514         sprintf (iommu->name, "dmar%d", iommu->seq_id);
515
516         iommu->reg = ioremap(drhd->reg_base_addr, VTD_PAGE_SIZE);
517         if (!iommu->reg) {
518                 printk(KERN_ERR "IOMMU: can't map the region\n");
519                 goto error;
520         }
521         iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
522         iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
523
524 #ifdef CONFIG_DMAR
525         agaw = iommu_calculate_agaw(iommu);
526         if (agaw < 0) {
527                 printk(KERN_ERR
528                         "Cannot get a valid agaw for iommu (seq_id = %d)\n",
529                         iommu->seq_id);
530                 goto error;
531         }
532 #endif
533         iommu->agaw = agaw;
534
535         /* the registers might be more than one page */
536         map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
537                 cap_max_fault_reg_offset(iommu->cap));
538         map_size = VTD_PAGE_ALIGN(map_size);
539         if (map_size > VTD_PAGE_SIZE) {
540                 iounmap(iommu->reg);
541                 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
542                 if (!iommu->reg) {
543                         printk(KERN_ERR "IOMMU: can't map the region\n");
544                         goto error;
545                 }
546         }
547
548         ver = readl(iommu->reg + DMAR_VER_REG);
549         pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
550                 (unsigned long long)drhd->reg_base_addr,
551                 DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
552                 (unsigned long long)iommu->cap,
553                 (unsigned long long)iommu->ecap);
554
555         spin_lock_init(&iommu->register_lock);
556
557         drhd->iommu = iommu;
558         return 0;
559 error:
560         kfree(iommu);
561         return -1;
562 }
563
564 void free_iommu(struct intel_iommu *iommu)
565 {
566         if (!iommu)
567                 return;
568
569 #ifdef CONFIG_DMAR
570         free_dmar_iommu(iommu);
571 #endif
572
573         if (iommu->reg)
574                 iounmap(iommu->reg);
575         kfree(iommu);
576 }
577
578 /*
579  * Reclaim all the submitted descriptors which have completed its work.
580  */
581 static inline void reclaim_free_desc(struct q_inval *qi)
582 {
583         while (qi->desc_status[qi->free_tail] == QI_DONE) {
584                 qi->desc_status[qi->free_tail] = QI_FREE;
585                 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
586                 qi->free_cnt++;
587         }
588 }
589
590 static int qi_check_fault(struct intel_iommu *iommu, int index)
591 {
592         u32 fault;
593         int head;
594         struct q_inval *qi = iommu->qi;
595         int wait_index = (index + 1) % QI_LENGTH;
596
597         fault = readl(iommu->reg + DMAR_FSTS_REG);
598
599         /*
600          * If IQE happens, the head points to the descriptor associated
601          * with the error. No new descriptors are fetched until the IQE
602          * is cleared.
603          */
604         if (fault & DMA_FSTS_IQE) {
605                 head = readl(iommu->reg + DMAR_IQH_REG);
606                 if ((head >> 4) == index) {
607                         memcpy(&qi->desc[index], &qi->desc[wait_index],
608                                         sizeof(struct qi_desc));
609                         __iommu_flush_cache(iommu, &qi->desc[index],
610                                         sizeof(struct qi_desc));
611                         writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
612                         return -EINVAL;
613                 }
614         }
615
616         return 0;
617 }
618
619 /*
620  * Submit the queued invalidation descriptor to the remapping
621  * hardware unit and wait for its completion.
622  */
623 int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
624 {
625         int rc = 0;
626         struct q_inval *qi = iommu->qi;
627         struct qi_desc *hw, wait_desc;
628         int wait_index, index;
629         unsigned long flags;
630
631         if (!qi)
632                 return 0;
633
634         hw = qi->desc;
635
636         spin_lock_irqsave(&qi->q_lock, flags);
637         while (qi->free_cnt < 3) {
638                 spin_unlock_irqrestore(&qi->q_lock, flags);
639                 cpu_relax();
640                 spin_lock_irqsave(&qi->q_lock, flags);
641         }
642
643         index = qi->free_head;
644         wait_index = (index + 1) % QI_LENGTH;
645
646         qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
647
648         hw[index] = *desc;
649
650         wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
651                         QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
652         wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
653
654         hw[wait_index] = wait_desc;
655
656         __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
657         __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
658
659         qi->free_head = (qi->free_head + 2) % QI_LENGTH;
660         qi->free_cnt -= 2;
661
662         /*
663          * update the HW tail register indicating the presence of
664          * new descriptors.
665          */
666         writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
667
668         while (qi->desc_status[wait_index] != QI_DONE) {
669                 /*
670                  * We will leave the interrupts disabled, to prevent interrupt
671                  * context to queue another cmd while a cmd is already submitted
672                  * and waiting for completion on this cpu. This is to avoid
673                  * a deadlock where the interrupt context can wait indefinitely
674                  * for free slots in the queue.
675                  */
676                 rc = qi_check_fault(iommu, index);
677                 if (rc)
678                         goto out;
679
680                 spin_unlock(&qi->q_lock);
681                 cpu_relax();
682                 spin_lock(&qi->q_lock);
683         }
684 out:
685         qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE;
686
687         reclaim_free_desc(qi);
688         spin_unlock_irqrestore(&qi->q_lock, flags);
689
690         return rc;
691 }
692
693 /*
694  * Flush the global interrupt entry cache.
695  */
696 void qi_global_iec(struct intel_iommu *iommu)
697 {
698         struct qi_desc desc;
699
700         desc.low = QI_IEC_TYPE;
701         desc.high = 0;
702
703         /* should never fail */
704         qi_submit_sync(&desc, iommu);
705 }
706
707 int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
708                      u64 type, int non_present_entry_flush)
709 {
710         struct qi_desc desc;
711
712         if (non_present_entry_flush) {
713                 if (!cap_caching_mode(iommu->cap))
714                         return 1;
715                 else
716                         did = 0;
717         }
718
719         desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
720                         | QI_CC_GRAN(type) | QI_CC_TYPE;
721         desc.high = 0;
722
723         return qi_submit_sync(&desc, iommu);
724 }
725
726 int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
727                    unsigned int size_order, u64 type,
728                    int non_present_entry_flush)
729 {
730         u8 dw = 0, dr = 0;
731
732         struct qi_desc desc;
733         int ih = 0;
734
735         if (non_present_entry_flush) {
736                 if (!cap_caching_mode(iommu->cap))
737                         return 1;
738                 else
739                         did = 0;
740         }
741
742         if (cap_write_drain(iommu->cap))
743                 dw = 1;
744
745         if (cap_read_drain(iommu->cap))
746                 dr = 1;
747
748         desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
749                 | QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
750         desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
751                 | QI_IOTLB_AM(size_order);
752
753         return qi_submit_sync(&desc, iommu);
754 }
755
756 /*
757  * Enable Queued Invalidation interface. This is a must to support
758  * interrupt-remapping. Also used by DMA-remapping, which replaces
759  * register based IOTLB invalidation.
760  */
761 int dmar_enable_qi(struct intel_iommu *iommu)
762 {
763         u32 cmd, sts;
764         unsigned long flags;
765         struct q_inval *qi;
766
767         if (!ecap_qis(iommu->ecap))
768                 return -ENOENT;
769
770         /*
771          * queued invalidation is already setup and enabled.
772          */
773         if (iommu->qi)
774                 return 0;
775
776         iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
777         if (!iommu->qi)
778                 return -ENOMEM;
779
780         qi = iommu->qi;
781
782         qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
783         if (!qi->desc) {
784                 kfree(qi);
785                 iommu->qi = 0;
786                 return -ENOMEM;
787         }
788
789         qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
790         if (!qi->desc_status) {
791                 free_page((unsigned long) qi->desc);
792                 kfree(qi);
793                 iommu->qi = 0;
794                 return -ENOMEM;
795         }
796
797         qi->free_head = qi->free_tail = 0;
798         qi->free_cnt = QI_LENGTH;
799
800         spin_lock_init(&qi->q_lock);
801
802         spin_lock_irqsave(&iommu->register_lock, flags);
803         /* write zero to the tail reg */
804         writel(0, iommu->reg + DMAR_IQT_REG);
805
806         dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
807
808         cmd = iommu->gcmd | DMA_GCMD_QIE;
809         iommu->gcmd |= DMA_GCMD_QIE;
810         writel(cmd, iommu->reg + DMAR_GCMD_REG);
811
812         /* Make sure hardware complete it */
813         IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
814         spin_unlock_irqrestore(&iommu->register_lock, flags);
815
816         return 0;
817 }
818
819 /* iommu interrupt handling. Most stuff are MSI-like. */
820
821 enum faulttype {
822         DMA_REMAP,
823         INTR_REMAP,
824         UNKNOWN,
825 };
826
827 static const char *dma_remap_fault_reasons[] =
828 {
829         "Software",
830         "Present bit in root entry is clear",
831         "Present bit in context entry is clear",
832         "Invalid context entry",
833         "Access beyond MGAW",
834         "PTE Write access is not set",
835         "PTE Read access is not set",
836         "Next page table ptr is invalid",
837         "Root table address invalid",
838         "Context table ptr is invalid",
839         "non-zero reserved fields in RTP",
840         "non-zero reserved fields in CTP",
841         "non-zero reserved fields in PTE",
842 };
843
844 static const char *intr_remap_fault_reasons[] =
845 {
846         "Detected reserved fields in the decoded interrupt-remapped request",
847         "Interrupt index exceeded the interrupt-remapping table size",
848         "Present field in the IRTE entry is clear",
849         "Error accessing interrupt-remapping table pointed by IRTA_REG",
850         "Detected reserved fields in the IRTE entry",
851         "Blocked a compatibility format interrupt request",
852         "Blocked an interrupt request due to source-id verification failure",
853 };
854
855 #define MAX_FAULT_REASON_IDX    (ARRAY_SIZE(fault_reason_strings) - 1)
856
857 const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
858 {
859         if (fault_reason >= 0x20 && (fault_reason <= 0x20 +
860                                      ARRAY_SIZE(intr_remap_fault_reasons))) {
861                 *fault_type = INTR_REMAP;
862                 return intr_remap_fault_reasons[fault_reason - 0x20];
863         } else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
864                 *fault_type = DMA_REMAP;
865                 return dma_remap_fault_reasons[fault_reason];
866         } else {
867                 *fault_type = UNKNOWN;
868                 return "Unknown";
869         }
870 }
871
872 void dmar_msi_unmask(unsigned int irq)
873 {
874         struct intel_iommu *iommu = get_irq_data(irq);
875         unsigned long flag;
876
877         /* unmask it */
878         spin_lock_irqsave(&iommu->register_lock, flag);
879         writel(0, iommu->reg + DMAR_FECTL_REG);
880         /* Read a reg to force flush the post write */
881         readl(iommu->reg + DMAR_FECTL_REG);
882         spin_unlock_irqrestore(&iommu->register_lock, flag);
883 }
884
885 void dmar_msi_mask(unsigned int irq)
886 {
887         unsigned long flag;
888         struct intel_iommu *iommu = get_irq_data(irq);
889
890         /* mask it */
891         spin_lock_irqsave(&iommu->register_lock, flag);
892         writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
893         /* Read a reg to force flush the post write */
894         readl(iommu->reg + DMAR_FECTL_REG);
895         spin_unlock_irqrestore(&iommu->register_lock, flag);
896 }
897
898 void dmar_msi_write(int irq, struct msi_msg *msg)
899 {
900         struct intel_iommu *iommu = get_irq_data(irq);
901         unsigned long flag;
902
903         spin_lock_irqsave(&iommu->register_lock, flag);
904         writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
905         writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
906         writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
907         spin_unlock_irqrestore(&iommu->register_lock, flag);
908 }
909
910 void dmar_msi_read(int irq, struct msi_msg *msg)
911 {
912         struct intel_iommu *iommu = get_irq_data(irq);
913         unsigned long flag;
914
915         spin_lock_irqsave(&iommu->register_lock, flag);
916         msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
917         msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
918         msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
919         spin_unlock_irqrestore(&iommu->register_lock, flag);
920 }
921
922 static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
923                 u8 fault_reason, u16 source_id, unsigned long long addr)
924 {
925         const char *reason;
926         int fault_type;
927
928         reason = dmar_get_fault_reason(fault_reason, &fault_type);
929
930         if (fault_type == INTR_REMAP)
931                 printk(KERN_ERR "INTR-REMAP: Request device [[%02x:%02x.%d] "
932                        "fault index %llx\n"
933                         "INTR-REMAP:[fault reason %02d] %s\n",
934                         (source_id >> 8), PCI_SLOT(source_id & 0xFF),
935                         PCI_FUNC(source_id & 0xFF), addr >> 48,
936                         fault_reason, reason);
937         else
938                 printk(KERN_ERR
939                        "DMAR:[%s] Request device [%02x:%02x.%d] "
940                        "fault addr %llx \n"
941                        "DMAR:[fault reason %02d] %s\n",
942                        (type ? "DMA Read" : "DMA Write"),
943                        (source_id >> 8), PCI_SLOT(source_id & 0xFF),
944                        PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
945         return 0;
946 }
947
948 #define PRIMARY_FAULT_REG_LEN (16)
949 static irqreturn_t dmar_fault(int irq, void *dev_id)
950 {
951         struct intel_iommu *iommu = dev_id;
952         int reg, fault_index;
953         u32 fault_status;
954         unsigned long flag;
955
956         spin_lock_irqsave(&iommu->register_lock, flag);
957         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
958         if (fault_status)
959                 printk(KERN_ERR "DRHD: handling fault status reg %x\n",
960                        fault_status);
961
962         /* TBD: ignore advanced fault log currently */
963         if (!(fault_status & DMA_FSTS_PPF))
964                 goto clear_rest;
965
966         fault_index = dma_fsts_fault_record_index(fault_status);
967         reg = cap_fault_reg_offset(iommu->cap);
968         while (1) {
969                 u8 fault_reason;
970                 u16 source_id;
971                 u64 guest_addr;
972                 int type;
973                 u32 data;
974
975                 /* highest 32 bits */
976                 data = readl(iommu->reg + reg +
977                                 fault_index * PRIMARY_FAULT_REG_LEN + 12);
978                 if (!(data & DMA_FRCD_F))
979                         break;
980
981                 fault_reason = dma_frcd_fault_reason(data);
982                 type = dma_frcd_type(data);
983
984                 data = readl(iommu->reg + reg +
985                                 fault_index * PRIMARY_FAULT_REG_LEN + 8);
986                 source_id = dma_frcd_source_id(data);
987
988                 guest_addr = dmar_readq(iommu->reg + reg +
989                                 fault_index * PRIMARY_FAULT_REG_LEN);
990                 guest_addr = dma_frcd_page_addr(guest_addr);
991                 /* clear the fault */
992                 writel(DMA_FRCD_F, iommu->reg + reg +
993                         fault_index * PRIMARY_FAULT_REG_LEN + 12);
994
995                 spin_unlock_irqrestore(&iommu->register_lock, flag);
996
997                 dmar_fault_do_one(iommu, type, fault_reason,
998                                 source_id, guest_addr);
999
1000                 fault_index++;
1001                 if (fault_index > cap_num_fault_regs(iommu->cap))
1002                         fault_index = 0;
1003                 spin_lock_irqsave(&iommu->register_lock, flag);
1004         }
1005 clear_rest:
1006         /* clear all the other faults */
1007         fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1008         writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1009
1010         spin_unlock_irqrestore(&iommu->register_lock, flag);
1011         return IRQ_HANDLED;
1012 }
1013
1014 int dmar_set_interrupt(struct intel_iommu *iommu)
1015 {
1016         int irq, ret;
1017
1018         /*
1019          * Check if the fault interrupt is already initialized.
1020          */
1021         if (iommu->irq)
1022                 return 0;
1023
1024         irq = create_irq();
1025         if (!irq) {
1026                 printk(KERN_ERR "IOMMU: no free vectors\n");
1027                 return -EINVAL;
1028         }
1029
1030         set_irq_data(irq, iommu);
1031         iommu->irq = irq;
1032
1033         ret = arch_setup_dmar_msi(irq);
1034         if (ret) {
1035                 set_irq_data(irq, NULL);
1036                 iommu->irq = 0;
1037                 destroy_irq(irq);
1038                 return 0;
1039         }
1040
1041         /* Force fault register is cleared */
1042         dmar_fault(irq, iommu);
1043
1044         ret = request_irq(irq, dmar_fault, 0, iommu->name, iommu);
1045         if (ret)
1046                 printk(KERN_ERR "IOMMU: can't request irq\n");
1047         return ret;
1048 }
1049
1050 int __init enable_drhd_fault_handling(void)
1051 {
1052         struct dmar_drhd_unit *drhd;
1053
1054         /*
1055          * Enable fault control interrupt.
1056          */
1057         for_each_drhd_unit(drhd) {
1058                 int ret;
1059                 struct intel_iommu *iommu = drhd->iommu;
1060                 ret = dmar_set_interrupt(iommu);
1061
1062                 if (ret) {
1063                         printk(KERN_ERR "DRHD %Lx: failed to enable fault, "
1064                                " interrupt, ret %d\n",
1065                                (unsigned long long)drhd->reg_base_addr, ret);
1066                         return -1;
1067                 }
1068         }
1069
1070         return 0;
1071 }