]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/dma/ioat/dma.c
edf4f5e5de735d8fbf97ee76a2a24299a108abee
[net-next-2.6.git] / drivers / dma / ioat / dma.c
1 /*
2  * Intel I/OAT DMA Linux driver
3  * Copyright(c) 2004 - 2009 Intel Corporation.
4  *
5  * This program is free software; you can redistribute it and/or modify it
6  * under the terms and conditions of the GNU General Public License,
7  * version 2, as published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it will be useful, but WITHOUT
10  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
12  * more details.
13  *
14  * You should have received a copy of the GNU General Public License along with
15  * this program; if not, write to the Free Software Foundation, Inc.,
16  * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
17  *
18  * The full GNU General Public License is included in this distribution in
19  * the file called "COPYING".
20  *
21  */
22
23 /*
24  * This driver supports an Intel I/OAT DMA engine, which does asynchronous
25  * copy operations.
26  */
27
28 #include <linux/init.h>
29 #include <linux/module.h>
30 #include <linux/pci.h>
31 #include <linux/interrupt.h>
32 #include <linux/dmaengine.h>
33 #include <linux/delay.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/workqueue.h>
36 #include <linux/i7300_idle.h>
37 #include "dma.h"
38 #include "registers.h"
39 #include "hw.h"
40
41 int ioat_pending_level = 4;
42 module_param(ioat_pending_level, int, 0644);
43 MODULE_PARM_DESC(ioat_pending_level,
44                  "high-water mark for pushing ioat descriptors (default: 4)");
45
46 /* internal functions */
47 static void ioat1_cleanup(struct ioat_dma_chan *ioat);
48 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat);
49
50 /**
51  * ioat_dma_do_interrupt - handler used for single vector interrupt mode
52  * @irq: interrupt id
53  * @data: interrupt data
54  */
55 static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
56 {
57         struct ioatdma_device *instance = data;
58         struct ioat_chan_common *chan;
59         unsigned long attnstatus;
60         int bit;
61         u8 intrctrl;
62
63         intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
64
65         if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
66                 return IRQ_NONE;
67
68         if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
69                 writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
70                 return IRQ_NONE;
71         }
72
73         attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
74         for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
75                 chan = ioat_chan_by_index(instance, bit);
76                 tasklet_schedule(&chan->cleanup_task);
77         }
78
79         writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
80         return IRQ_HANDLED;
81 }
82
83 /**
84  * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
85  * @irq: interrupt id
86  * @data: interrupt data
87  */
88 static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
89 {
90         struct ioat_chan_common *chan = data;
91
92         tasklet_schedule(&chan->cleanup_task);
93
94         return IRQ_HANDLED;
95 }
96
97 static void ioat1_cleanup_tasklet(unsigned long data);
98
99 /* common channel initialization */
100 void ioat_init_channel(struct ioatdma_device *device,
101                        struct ioat_chan_common *chan, int idx,
102                        work_func_t work_fn, void (*tasklet)(unsigned long),
103                        unsigned long tasklet_data)
104 {
105         struct dma_device *dma = &device->common;
106
107         chan->device = device;
108         chan->reg_base = device->reg_base + (0x80 * (idx + 1));
109         INIT_DELAYED_WORK(&chan->work, work_fn);
110         spin_lock_init(&chan->cleanup_lock);
111         chan->common.device = dma;
112         list_add_tail(&chan->common.device_node, &dma->channels);
113         device->idx[idx] = chan;
114         tasklet_init(&chan->cleanup_task, tasklet, tasklet_data);
115         tasklet_disable(&chan->cleanup_task);
116 }
117
118 static void ioat1_reset_part2(struct work_struct *work);
119
120 /**
121  * ioat1_dma_enumerate_channels - find and initialize the device's channels
122  * @device: the device to be enumerated
123  */
124 static int ioat1_enumerate_channels(struct ioatdma_device *device)
125 {
126         u8 xfercap_scale;
127         u32 xfercap;
128         int i;
129         struct ioat_dma_chan *ioat;
130         struct device *dev = &device->pdev->dev;
131         struct dma_device *dma = &device->common;
132
133         INIT_LIST_HEAD(&dma->channels);
134         dma->chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
135         xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
136         xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
137         dev_dbg(dev, "%s: xfercap = %d\n", __func__, xfercap);
138
139 #ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
140         if (i7300_idle_platform_probe(NULL, NULL, 1) == 0)
141                 dma->chancnt--;
142 #endif
143         for (i = 0; i < dma->chancnt; i++) {
144                 ioat = devm_kzalloc(dev, sizeof(*ioat), GFP_KERNEL);
145                 if (!ioat)
146                         break;
147
148                 ioat_init_channel(device, &ioat->base, i,
149                                   ioat1_reset_part2,
150                                   ioat1_cleanup_tasklet,
151                                   (unsigned long) ioat);
152                 ioat->xfercap = xfercap;
153                 spin_lock_init(&ioat->desc_lock);
154                 INIT_LIST_HEAD(&ioat->free_desc);
155                 INIT_LIST_HEAD(&ioat->used_desc);
156         }
157         dma->chancnt = i;
158         return i;
159 }
160
161 /**
162  * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
163  *                                 descriptors to hw
164  * @chan: DMA channel handle
165  */
166 static inline void
167 __ioat1_dma_memcpy_issue_pending(struct ioat_dma_chan *ioat)
168 {
169         void __iomem *reg_base = ioat->base.reg_base;
170
171         dev_dbg(to_dev(&ioat->base), "%s: pending: %d\n",
172                 __func__, ioat->pending);
173         ioat->pending = 0;
174         writeb(IOAT_CHANCMD_APPEND, reg_base + IOAT1_CHANCMD_OFFSET);
175 }
176
177 static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
178 {
179         struct ioat_dma_chan *ioat = to_ioat_chan(chan);
180
181         if (ioat->pending > 0) {
182                 spin_lock_bh(&ioat->desc_lock);
183                 __ioat1_dma_memcpy_issue_pending(ioat);
184                 spin_unlock_bh(&ioat->desc_lock);
185         }
186 }
187
188 /**
189  * ioat1_reset_part2 - reinit the channel after a reset
190  */
191 static void ioat1_reset_part2(struct work_struct *work)
192 {
193         struct ioat_chan_common *chan;
194         struct ioat_dma_chan *ioat;
195         struct ioat_desc_sw *desc;
196         int dmacount;
197         bool start_null = false;
198
199         chan = container_of(work, struct ioat_chan_common, work.work);
200         ioat = container_of(chan, struct ioat_dma_chan, base);
201         spin_lock_bh(&chan->cleanup_lock);
202         spin_lock_bh(&ioat->desc_lock);
203
204         chan->completion_virt->low = 0;
205         chan->completion_virt->high = 0;
206         ioat->pending = 0;
207
208         /* count the descriptors waiting */
209         dmacount = 0;
210         if (ioat->used_desc.prev) {
211                 desc = to_ioat_desc(ioat->used_desc.prev);
212                 do {
213                         dmacount++;
214                         desc = to_ioat_desc(desc->node.next);
215                 } while (&desc->node != ioat->used_desc.next);
216         }
217
218         if (dmacount) {
219                 /*
220                  * write the new starting descriptor address
221                  * this puts channel engine into ARMED state
222                  */
223                 desc = to_ioat_desc(ioat->used_desc.prev);
224                 writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
225                        chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
226                 writel(((u64) desc->txd.phys) >> 32,
227                        chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
228
229                 writeb(IOAT_CHANCMD_START, chan->reg_base
230                         + IOAT_CHANCMD_OFFSET(chan->device->version));
231         } else
232                 start_null = true;
233         spin_unlock_bh(&ioat->desc_lock);
234         spin_unlock_bh(&chan->cleanup_lock);
235
236         dev_err(to_dev(chan),
237                 "chan%d reset - %d descs waiting, %d total desc\n",
238                 chan_num(chan), dmacount, ioat->desccount);
239
240         if (start_null)
241                 ioat1_dma_start_null_desc(ioat);
242 }
243
244 /**
245  * ioat1_reset_channel - restart a channel
246  * @ioat: IOAT DMA channel handle
247  */
248 static void ioat1_reset_channel(struct ioat_dma_chan *ioat)
249 {
250         struct ioat_chan_common *chan = &ioat->base;
251         void __iomem *reg_base = chan->reg_base;
252         u32 chansts, chanerr;
253
254         if (!ioat->used_desc.prev)
255                 return;
256
257         dev_dbg(to_dev(chan), "%s\n", __func__);
258         chanerr = readl(reg_base + IOAT_CHANERR_OFFSET);
259         chansts = (chan->completion_virt->low
260                                         & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
261         if (chanerr) {
262                 dev_err(to_dev(chan),
263                         "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
264                         chan_num(chan), chansts, chanerr);
265                 writel(chanerr, reg_base + IOAT_CHANERR_OFFSET);
266         }
267
268         /*
269          * whack it upside the head with a reset
270          * and wait for things to settle out.
271          * force the pending count to a really big negative
272          * to make sure no one forces an issue_pending
273          * while we're waiting.
274          */
275
276         spin_lock_bh(&ioat->desc_lock);
277         ioat->pending = INT_MIN;
278         writeb(IOAT_CHANCMD_RESET,
279                reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
280         spin_unlock_bh(&ioat->desc_lock);
281
282         /* schedule the 2nd half instead of sleeping a long time */
283         schedule_delayed_work(&chan->work, RESET_DELAY);
284 }
285
286 /**
287  * ioat1_chan_watchdog - watch for stuck channels
288  */
289 static void ioat1_chan_watchdog(struct work_struct *work)
290 {
291         struct ioatdma_device *device =
292                 container_of(work, struct ioatdma_device, work.work);
293         struct ioat_dma_chan *ioat;
294         struct ioat_chan_common *chan;
295         int i;
296
297         union {
298                 u64 full;
299                 struct {
300                         u32 low;
301                         u32 high;
302                 };
303         } completion_hw;
304         unsigned long compl_desc_addr_hw;
305
306         for (i = 0; i < device->common.chancnt; i++) {
307                 chan = ioat_chan_by_index(device, i);
308                 ioat = container_of(chan, struct ioat_dma_chan, base);
309
310                 if (/* have we started processing anything yet */
311                     chan->last_completion
312                     /* have we completed any since last watchdog cycle? */
313                     && (chan->last_completion == chan->watchdog_completion)
314                     /* has TCP stuck on one cookie since last watchdog? */
315                     && (chan->watchdog_tcp_cookie == chan->watchdog_last_tcp_cookie)
316                     && (chan->watchdog_tcp_cookie != chan->completed_cookie)
317                     /* is there something in the chain to be processed? */
318                     /* CB1 chain always has at least the last one processed */
319                     && (ioat->used_desc.prev != ioat->used_desc.next)
320                     && ioat->pending == 0) {
321
322                         /*
323                          * check CHANSTS register for completed
324                          * descriptor address.
325                          * if it is different than completion writeback,
326                          * it is not zero
327                          * and it has changed since the last watchdog
328                          *     we can assume that channel
329                          *     is still working correctly
330                          *     and the problem is in completion writeback.
331                          *     update completion writeback
332                          *     with actual CHANSTS value
333                          * else
334                          *     try resetting the channel
335                          */
336
337                         completion_hw.low = readl(chan->reg_base +
338                                 IOAT_CHANSTS_OFFSET_LOW(chan->device->version));
339                         completion_hw.high = readl(chan->reg_base +
340                                 IOAT_CHANSTS_OFFSET_HIGH(chan->device->version));
341 #if (BITS_PER_LONG == 64)
342                         compl_desc_addr_hw =
343                                 completion_hw.full
344                                 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
345 #else
346                         compl_desc_addr_hw =
347                                 completion_hw.low & IOAT_LOW_COMPLETION_MASK;
348 #endif
349
350                         if ((compl_desc_addr_hw != 0)
351                            && (compl_desc_addr_hw != chan->watchdog_completion)
352                            && (compl_desc_addr_hw != chan->last_compl_desc_addr_hw)) {
353                                 chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
354                                 chan->completion_virt->low = completion_hw.low;
355                                 chan->completion_virt->high = completion_hw.high;
356                         } else {
357                                 ioat1_reset_channel(ioat);
358                                 chan->watchdog_completion = 0;
359                                 chan->last_compl_desc_addr_hw = 0;
360                         }
361                 } else {
362                         chan->last_compl_desc_addr_hw = 0;
363                         chan->watchdog_completion = chan->last_completion;
364                 }
365
366                 chan->watchdog_last_tcp_cookie = chan->watchdog_tcp_cookie;
367         }
368
369         schedule_delayed_work(&device->work, WATCHDOG_DELAY);
370 }
371
372 static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
373 {
374         struct dma_chan *c = tx->chan;
375         struct ioat_dma_chan *ioat = to_ioat_chan(c);
376         struct ioat_desc_sw *desc = tx_to_ioat_desc(tx);
377         struct ioat_desc_sw *first;
378         struct ioat_desc_sw *chain_tail;
379         dma_cookie_t cookie;
380
381         spin_lock_bh(&ioat->desc_lock);
382         /* cookie incr and addition to used_list must be atomic */
383         cookie = c->cookie;
384         cookie++;
385         if (cookie < 0)
386                 cookie = 1;
387         c->cookie = cookie;
388         tx->cookie = cookie;
389         dev_dbg(to_dev(&ioat->base), "%s: cookie: %d\n", __func__, cookie);
390
391         /* write address into NextDescriptor field of last desc in chain */
392         first = to_ioat_desc(tx->tx_list.next);
393         chain_tail = to_ioat_desc(ioat->used_desc.prev);
394         /* make descriptor updates globally visible before chaining */
395         wmb();
396         chain_tail->hw->next = first->txd.phys;
397         list_splice_tail_init(&tx->tx_list, &ioat->used_desc);
398         dump_desc_dbg(ioat, chain_tail);
399         dump_desc_dbg(ioat, first);
400
401         ioat->pending += desc->tx_cnt;
402         if (ioat->pending >= ioat_pending_level)
403                 __ioat1_dma_memcpy_issue_pending(ioat);
404         spin_unlock_bh(&ioat->desc_lock);
405
406         return cookie;
407 }
408
409 /**
410  * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
411  * @ioat: the channel supplying the memory pool for the descriptors
412  * @flags: allocation flags
413  */
414 static struct ioat_desc_sw *
415 ioat_dma_alloc_descriptor(struct ioat_dma_chan *ioat, gfp_t flags)
416 {
417         struct ioat_dma_descriptor *desc;
418         struct ioat_desc_sw *desc_sw;
419         struct ioatdma_device *ioatdma_device;
420         dma_addr_t phys;
421
422         ioatdma_device = ioat->base.device;
423         desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
424         if (unlikely(!desc))
425                 return NULL;
426
427         desc_sw = kzalloc(sizeof(*desc_sw), flags);
428         if (unlikely(!desc_sw)) {
429                 pci_pool_free(ioatdma_device->dma_pool, desc, phys);
430                 return NULL;
431         }
432
433         memset(desc, 0, sizeof(*desc));
434
435         dma_async_tx_descriptor_init(&desc_sw->txd, &ioat->base.common);
436         desc_sw->txd.tx_submit = ioat1_tx_submit;
437         desc_sw->hw = desc;
438         desc_sw->txd.phys = phys;
439         set_desc_id(desc_sw, -1);
440
441         return desc_sw;
442 }
443
444 static int ioat_initial_desc_count = 256;
445 module_param(ioat_initial_desc_count, int, 0644);
446 MODULE_PARM_DESC(ioat_initial_desc_count,
447                  "ioat1: initial descriptors per channel (default: 256)");
448 /**
449  * ioat1_dma_alloc_chan_resources - returns the number of allocated descriptors
450  * @chan: the channel to be filled out
451  */
452 static int ioat1_dma_alloc_chan_resources(struct dma_chan *c)
453 {
454         struct ioat_dma_chan *ioat = to_ioat_chan(c);
455         struct ioat_chan_common *chan = &ioat->base;
456         struct ioat_desc_sw *desc;
457         u16 chanctrl;
458         u32 chanerr;
459         int i;
460         LIST_HEAD(tmp_list);
461
462         /* have we already been set up? */
463         if (!list_empty(&ioat->free_desc))
464                 return ioat->desccount;
465
466         /* Setup register to interrupt and write completion status on error */
467         chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
468                 IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
469                 IOAT_CHANCTRL_ERR_COMPLETION_EN;
470         writew(chanctrl, chan->reg_base + IOAT_CHANCTRL_OFFSET);
471
472         chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET);
473         if (chanerr) {
474                 dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr);
475                 writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET);
476         }
477
478         /* Allocate descriptors */
479         for (i = 0; i < ioat_initial_desc_count; i++) {
480                 desc = ioat_dma_alloc_descriptor(ioat, GFP_KERNEL);
481                 if (!desc) {
482                         dev_err(to_dev(chan), "Only %d initial descriptors\n", i);
483                         break;
484                 }
485                 set_desc_id(desc, i);
486                 list_add_tail(&desc->node, &tmp_list);
487         }
488         spin_lock_bh(&ioat->desc_lock);
489         ioat->desccount = i;
490         list_splice(&tmp_list, &ioat->free_desc);
491         spin_unlock_bh(&ioat->desc_lock);
492
493         /* allocate a completion writeback area */
494         /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
495         chan->completion_virt = pci_pool_alloc(chan->device->completion_pool,
496                                                GFP_KERNEL,
497                                                &chan->completion_addr);
498         memset(chan->completion_virt, 0,
499                sizeof(*chan->completion_virt));
500         writel(((u64) chan->completion_addr) & 0x00000000FFFFFFFF,
501                chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
502         writel(((u64) chan->completion_addr) >> 32,
503                chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
504
505         tasklet_enable(&chan->cleanup_task);
506         ioat1_dma_start_null_desc(ioat);  /* give chain to dma device */
507         dev_dbg(to_dev(chan), "%s: allocated %d descriptors\n",
508                 __func__, ioat->desccount);
509         return ioat->desccount;
510 }
511
512 /**
513  * ioat1_dma_free_chan_resources - release all the descriptors
514  * @chan: the channel to be cleaned
515  */
516 static void ioat1_dma_free_chan_resources(struct dma_chan *c)
517 {
518         struct ioat_dma_chan *ioat = to_ioat_chan(c);
519         struct ioat_chan_common *chan = &ioat->base;
520         struct ioatdma_device *ioatdma_device = chan->device;
521         struct ioat_desc_sw *desc, *_desc;
522         int in_use_descs = 0;
523
524         /* Before freeing channel resources first check
525          * if they have been previously allocated for this channel.
526          */
527         if (ioat->desccount == 0)
528                 return;
529
530         tasklet_disable(&chan->cleanup_task);
531         ioat1_cleanup(ioat);
532
533         /* Delay 100ms after reset to allow internal DMA logic to quiesce
534          * before removing DMA descriptor resources.
535          */
536         writeb(IOAT_CHANCMD_RESET,
537                chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version));
538         mdelay(100);
539
540         spin_lock_bh(&ioat->desc_lock);
541         list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
542                 dev_dbg(to_dev(chan), "%s: freeing %d from used list\n",
543                         __func__, desc_id(desc));
544                 dump_desc_dbg(ioat, desc);
545                 in_use_descs++;
546                 list_del(&desc->node);
547                 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
548                               desc->txd.phys);
549                 kfree(desc);
550         }
551         list_for_each_entry_safe(desc, _desc,
552                                  &ioat->free_desc, node) {
553                 list_del(&desc->node);
554                 pci_pool_free(ioatdma_device->dma_pool, desc->hw,
555                               desc->txd.phys);
556                 kfree(desc);
557         }
558         spin_unlock_bh(&ioat->desc_lock);
559
560         pci_pool_free(ioatdma_device->completion_pool,
561                       chan->completion_virt,
562                       chan->completion_addr);
563
564         /* one is ok since we left it on there on purpose */
565         if (in_use_descs > 1)
566                 dev_err(to_dev(chan), "Freeing %d in use descriptors!\n",
567                         in_use_descs - 1);
568
569         chan->last_completion = chan->completion_addr = 0;
570         chan->watchdog_completion = 0;
571         chan->last_compl_desc_addr_hw = 0;
572         chan->watchdog_tcp_cookie = chan->watchdog_last_tcp_cookie = 0;
573         ioat->pending = 0;
574         ioat->desccount = 0;
575 }
576
577 /**
578  * ioat1_dma_get_next_descriptor - return the next available descriptor
579  * @ioat: IOAT DMA channel handle
580  *
581  * Gets the next descriptor from the chain, and must be called with the
582  * channel's desc_lock held.  Allocates more descriptors if the channel
583  * has run out.
584  */
585 static struct ioat_desc_sw *
586 ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat)
587 {
588         struct ioat_desc_sw *new;
589
590         if (!list_empty(&ioat->free_desc)) {
591                 new = to_ioat_desc(ioat->free_desc.next);
592                 list_del(&new->node);
593         } else {
594                 /* try to get another desc */
595                 new = ioat_dma_alloc_descriptor(ioat, GFP_ATOMIC);
596                 if (!new) {
597                         dev_err(to_dev(&ioat->base), "alloc failed\n");
598                         return NULL;
599                 }
600         }
601         dev_dbg(to_dev(&ioat->base), "%s: allocated: %d\n",
602                 __func__, desc_id(new));
603         prefetch(new->hw);
604         return new;
605 }
606
607 static struct dma_async_tx_descriptor *
608 ioat1_dma_prep_memcpy(struct dma_chan *c, dma_addr_t dma_dest,
609                       dma_addr_t dma_src, size_t len, unsigned long flags)
610 {
611         struct ioat_dma_chan *ioat = to_ioat_chan(c);
612         struct ioat_desc_sw *desc;
613         size_t copy;
614         LIST_HEAD(chain);
615         dma_addr_t src = dma_src;
616         dma_addr_t dest = dma_dest;
617         size_t total_len = len;
618         struct ioat_dma_descriptor *hw = NULL;
619         int tx_cnt = 0;
620
621         spin_lock_bh(&ioat->desc_lock);
622         desc = ioat1_dma_get_next_descriptor(ioat);
623         do {
624                 if (!desc)
625                         break;
626
627                 tx_cnt++;
628                 copy = min_t(size_t, len, ioat->xfercap);
629
630                 hw = desc->hw;
631                 hw->size = copy;
632                 hw->ctl = 0;
633                 hw->src_addr = src;
634                 hw->dst_addr = dest;
635
636                 list_add_tail(&desc->node, &chain);
637
638                 len -= copy;
639                 dest += copy;
640                 src += copy;
641                 if (len) {
642                         struct ioat_desc_sw *next;
643
644                         async_tx_ack(&desc->txd);
645                         next = ioat1_dma_get_next_descriptor(ioat);
646                         hw->next = next ? next->txd.phys : 0;
647                         dump_desc_dbg(ioat, desc);
648                         desc = next;
649                 } else
650                         hw->next = 0;
651         } while (len);
652
653         if (!desc) {
654                 struct ioat_chan_common *chan = &ioat->base;
655
656                 dev_err(to_dev(chan),
657                         "chan%d - get_next_desc failed\n", chan_num(chan));
658                 list_splice(&chain, &ioat->free_desc);
659                 spin_unlock_bh(&ioat->desc_lock);
660                 return NULL;
661         }
662         spin_unlock_bh(&ioat->desc_lock);
663
664         desc->txd.flags = flags;
665         desc->tx_cnt = tx_cnt;
666         desc->len = total_len;
667         list_splice(&chain, &desc->txd.tx_list);
668         hw->ctl_f.int_en = !!(flags & DMA_PREP_INTERRUPT);
669         hw->ctl_f.compl_write = 1;
670         dump_desc_dbg(ioat, desc);
671
672         return &desc->txd;
673 }
674
675 static void ioat1_cleanup_tasklet(unsigned long data)
676 {
677         struct ioat_dma_chan *chan = (void *)data;
678         ioat1_cleanup(chan);
679         writew(IOAT_CHANCTRL_INT_DISABLE,
680                chan->base.reg_base + IOAT_CHANCTRL_OFFSET);
681 }
682
683 static void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len,
684                        int direction, enum dma_ctrl_flags flags, bool dst)
685 {
686         if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) ||
687             (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE)))
688                 pci_unmap_single(pdev, addr, len, direction);
689         else
690                 pci_unmap_page(pdev, addr, len, direction);
691 }
692
693
694 void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags,
695                     size_t len, struct ioat_dma_descriptor *hw)
696 {
697         struct pci_dev *pdev = chan->device->pdev;
698         size_t offset = len - hw->size;
699
700         if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP))
701                 ioat_unmap(pdev, hw->dst_addr - offset, len,
702                            PCI_DMA_FROMDEVICE, flags, 1);
703
704         if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP))
705                 ioat_unmap(pdev, hw->src_addr - offset, len,
706                            PCI_DMA_TODEVICE, flags, 0);
707 }
708
709 unsigned long ioat_get_current_completion(struct ioat_chan_common *chan)
710 {
711         unsigned long phys_complete;
712
713         /* The completion writeback can happen at any time,
714            so reads by the driver need to be atomic operations
715            The descriptor physical addresses are limited to 32-bits
716            when the CPU can only do a 32-bit mov */
717
718 #if (BITS_PER_LONG == 64)
719         phys_complete =
720                 chan->completion_virt->full
721                 & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
722 #else
723         phys_complete = chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
724 #endif
725
726         dev_dbg(to_dev(chan), "%s: phys_complete: %#llx\n", __func__,
727                 (unsigned long long) phys_complete);
728
729         if ((chan->completion_virt->full
730                 & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
731                                 IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
732                 dev_err(to_dev(chan), "Channel halted, chanerr = %x\n",
733                         readl(chan->reg_base + IOAT_CHANERR_OFFSET));
734
735                 /* TODO do something to salvage the situation */
736         }
737
738         return phys_complete;
739 }
740
741 /**
742  * ioat1_cleanup - cleanup up finished descriptors
743  * @chan: ioat channel to be cleaned up
744  */
745 static void ioat1_cleanup(struct ioat_dma_chan *ioat)
746 {
747         struct ioat_chan_common *chan = &ioat->base;
748         unsigned long phys_complete;
749         struct ioat_desc_sw *desc, *_desc;
750         dma_cookie_t cookie = 0;
751         struct dma_async_tx_descriptor *tx;
752
753         prefetch(chan->completion_virt);
754
755         if (!spin_trylock_bh(&chan->cleanup_lock))
756                 return;
757
758         phys_complete = ioat_get_current_completion(chan);
759         if (phys_complete == chan->last_completion) {
760                 spin_unlock_bh(&chan->cleanup_lock);
761                 /*
762                  * perhaps we're stuck so hard that the watchdog can't go off?
763                  * try to catch it after 2 seconds
764                  */
765                 if (time_after(jiffies,
766                                chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
767                         ioat1_chan_watchdog(&(chan->device->work.work));
768                         chan->last_completion_time = jiffies;
769                 }
770                 return;
771         }
772         chan->last_completion_time = jiffies;
773
774         cookie = 0;
775         if (!spin_trylock_bh(&ioat->desc_lock)) {
776                 spin_unlock_bh(&chan->cleanup_lock);
777                 return;
778         }
779
780         dev_dbg(to_dev(chan), "%s: phys_complete: %lx\n",
781                  __func__, phys_complete);
782         list_for_each_entry_safe(desc, _desc, &ioat->used_desc, node) {
783                 tx = &desc->txd;
784                 /*
785                  * Incoming DMA requests may use multiple descriptors,
786                  * due to exceeding xfercap, perhaps. If so, only the
787                  * last one will have a cookie, and require unmapping.
788                  */
789                 dump_desc_dbg(ioat, desc);
790                 if (tx->cookie) {
791                         cookie = tx->cookie;
792                         ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw);
793                         if (tx->callback) {
794                                 tx->callback(tx->callback_param);
795                                 tx->callback = NULL;
796                         }
797                 }
798
799                 if (tx->phys != phys_complete) {
800                         /*
801                          * a completed entry, but not the last, so clean
802                          * up if the client is done with the descriptor
803                          */
804                         if (async_tx_test_ack(tx))
805                                 list_move_tail(&desc->node, &ioat->free_desc);
806                         else
807                                 tx->cookie = 0;
808                 } else {
809                         /*
810                          * last used desc. Do not remove, so we can
811                          * append from it, but don't look at it next
812                          * time, either
813                          */
814                         tx->cookie = 0;
815
816                         /* TODO check status bits? */
817                         break;
818                 }
819         }
820
821         spin_unlock_bh(&ioat->desc_lock);
822
823         chan->last_completion = phys_complete;
824         if (cookie != 0)
825                 chan->completed_cookie = cookie;
826
827         spin_unlock_bh(&chan->cleanup_lock);
828 }
829
830 static enum dma_status
831 ioat1_dma_is_complete(struct dma_chan *c, dma_cookie_t cookie,
832                       dma_cookie_t *done, dma_cookie_t *used)
833 {
834         struct ioat_dma_chan *ioat = to_ioat_chan(c);
835
836         if (ioat_is_complete(c, cookie, done, used) == DMA_SUCCESS)
837                 return DMA_SUCCESS;
838
839         ioat1_cleanup(ioat);
840
841         return ioat_is_complete(c, cookie, done, used);
842 }
843
844 static void ioat1_dma_start_null_desc(struct ioat_dma_chan *ioat)
845 {
846         struct ioat_chan_common *chan = &ioat->base;
847         struct ioat_desc_sw *desc;
848         struct ioat_dma_descriptor *hw;
849
850         spin_lock_bh(&ioat->desc_lock);
851
852         desc = ioat1_dma_get_next_descriptor(ioat);
853
854         if (!desc) {
855                 dev_err(to_dev(chan),
856                         "Unable to start null desc - get next desc failed\n");
857                 spin_unlock_bh(&ioat->desc_lock);
858                 return;
859         }
860
861         hw = desc->hw;
862         hw->ctl = 0;
863         hw->ctl_f.null = 1;
864         hw->ctl_f.int_en = 1;
865         hw->ctl_f.compl_write = 1;
866         /* set size to non-zero value (channel returns error when size is 0) */
867         hw->size = NULL_DESC_BUFFER_SIZE;
868         hw->src_addr = 0;
869         hw->dst_addr = 0;
870         async_tx_ack(&desc->txd);
871         hw->next = 0;
872         list_add_tail(&desc->node, &ioat->used_desc);
873         dump_desc_dbg(ioat, desc);
874
875         writel(((u64) desc->txd.phys) & 0x00000000FFFFFFFF,
876                chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
877         writel(((u64) desc->txd.phys) >> 32,
878                chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
879
880         writeb(IOAT_CHANCMD_START, chan->reg_base
881                 + IOAT_CHANCMD_OFFSET(chan->device->version));
882         spin_unlock_bh(&ioat->desc_lock);
883 }
884
885 /*
886  * Perform a IOAT transaction to verify the HW works.
887  */
888 #define IOAT_TEST_SIZE 2000
889
890 static void ioat_dma_test_callback(void *dma_async_param)
891 {
892         struct completion *cmp = dma_async_param;
893
894         complete(cmp);
895 }
896
897 /**
898  * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
899  * @device: device to be tested
900  */
901 static int ioat_dma_self_test(struct ioatdma_device *device)
902 {
903         int i;
904         u8 *src;
905         u8 *dest;
906         struct dma_device *dma = &device->common;
907         struct device *dev = &device->pdev->dev;
908         struct dma_chan *dma_chan;
909         struct dma_async_tx_descriptor *tx;
910         dma_addr_t dma_dest, dma_src;
911         dma_cookie_t cookie;
912         int err = 0;
913         struct completion cmp;
914         unsigned long tmo;
915         unsigned long flags;
916
917         src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
918         if (!src)
919                 return -ENOMEM;
920         dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
921         if (!dest) {
922                 kfree(src);
923                 return -ENOMEM;
924         }
925
926         /* Fill in src buffer */
927         for (i = 0; i < IOAT_TEST_SIZE; i++)
928                 src[i] = (u8)i;
929
930         /* Start copy, using first DMA channel */
931         dma_chan = container_of(dma->channels.next, struct dma_chan,
932                                 device_node);
933         if (dma->device_alloc_chan_resources(dma_chan) < 1) {
934                 dev_err(dev, "selftest cannot allocate chan resource\n");
935                 err = -ENODEV;
936                 goto out;
937         }
938
939         dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE);
940         dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE);
941         flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_COMPL_DEST_UNMAP_SINGLE |
942                 DMA_PREP_INTERRUPT;
943         tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
944                                                    IOAT_TEST_SIZE, flags);
945         if (!tx) {
946                 dev_err(dev, "Self-test prep failed, disabling\n");
947                 err = -ENODEV;
948                 goto free_resources;
949         }
950
951         async_tx_ack(tx);
952         init_completion(&cmp);
953         tx->callback = ioat_dma_test_callback;
954         tx->callback_param = &cmp;
955         cookie = tx->tx_submit(tx);
956         if (cookie < 0) {
957                 dev_err(dev, "Self-test setup failed, disabling\n");
958                 err = -ENODEV;
959                 goto free_resources;
960         }
961         dma->device_issue_pending(dma_chan);
962
963         tmo = wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
964
965         if (tmo == 0 ||
966             dma->device_is_tx_complete(dma_chan, cookie, NULL, NULL)
967                                         != DMA_SUCCESS) {
968                 dev_err(dev, "Self-test copy timed out, disabling\n");
969                 err = -ENODEV;
970                 goto free_resources;
971         }
972         if (memcmp(src, dest, IOAT_TEST_SIZE)) {
973                 dev_err(dev, "Self-test copy failed compare, disabling\n");
974                 err = -ENODEV;
975                 goto free_resources;
976         }
977
978 free_resources:
979         dma->device_free_chan_resources(dma_chan);
980 out:
981         kfree(src);
982         kfree(dest);
983         return err;
984 }
985
986 static char ioat_interrupt_style[32] = "msix";
987 module_param_string(ioat_interrupt_style, ioat_interrupt_style,
988                     sizeof(ioat_interrupt_style), 0644);
989 MODULE_PARM_DESC(ioat_interrupt_style,
990                  "set ioat interrupt style: msix (default), "
991                  "msix-single-vector, msi, intx)");
992
993 /**
994  * ioat_dma_setup_interrupts - setup interrupt handler
995  * @device: ioat device
996  */
997 static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
998 {
999         struct ioat_chan_common *chan;
1000         struct pci_dev *pdev = device->pdev;
1001         struct device *dev = &pdev->dev;
1002         struct msix_entry *msix;
1003         int i, j, msixcnt;
1004         int err = -EINVAL;
1005         u8 intrctrl = 0;
1006
1007         if (!strcmp(ioat_interrupt_style, "msix"))
1008                 goto msix;
1009         if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
1010                 goto msix_single_vector;
1011         if (!strcmp(ioat_interrupt_style, "msi"))
1012                 goto msi;
1013         if (!strcmp(ioat_interrupt_style, "intx"))
1014                 goto intx;
1015         dev_err(dev, "invalid ioat_interrupt_style %s\n", ioat_interrupt_style);
1016         goto err_no_irq;
1017
1018 msix:
1019         /* The number of MSI-X vectors should equal the number of channels */
1020         msixcnt = device->common.chancnt;
1021         for (i = 0; i < msixcnt; i++)
1022                 device->msix_entries[i].entry = i;
1023
1024         err = pci_enable_msix(pdev, device->msix_entries, msixcnt);
1025         if (err < 0)
1026                 goto msi;
1027         if (err > 0)
1028                 goto msix_single_vector;
1029
1030         for (i = 0; i < msixcnt; i++) {
1031                 msix = &device->msix_entries[i];
1032                 chan = ioat_chan_by_index(device, i);
1033                 err = devm_request_irq(dev, msix->vector,
1034                                        ioat_dma_do_interrupt_msix, 0,
1035                                        "ioat-msix", chan);
1036                 if (err) {
1037                         for (j = 0; j < i; j++) {
1038                                 msix = &device->msix_entries[j];
1039                                 chan = ioat_chan_by_index(device, j);
1040                                 devm_free_irq(dev, msix->vector, chan);
1041                         }
1042                         goto msix_single_vector;
1043                 }
1044         }
1045         intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
1046         goto done;
1047
1048 msix_single_vector:
1049         msix = &device->msix_entries[0];
1050         msix->entry = 0;
1051         err = pci_enable_msix(pdev, device->msix_entries, 1);
1052         if (err)
1053                 goto msi;
1054
1055         err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0,
1056                                "ioat-msix", device);
1057         if (err) {
1058                 pci_disable_msix(pdev);
1059                 goto msi;
1060         }
1061         goto done;
1062
1063 msi:
1064         err = pci_enable_msi(pdev);
1065         if (err)
1066                 goto intx;
1067
1068         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt, 0,
1069                                "ioat-msi", device);
1070         if (err) {
1071                 pci_disable_msi(pdev);
1072                 goto intx;
1073         }
1074         goto done;
1075
1076 intx:
1077         err = devm_request_irq(dev, pdev->irq, ioat_dma_do_interrupt,
1078                                IRQF_SHARED, "ioat-intx", device);
1079         if (err)
1080                 goto err_no_irq;
1081
1082 done:
1083         if (device->intr_quirk)
1084                 device->intr_quirk(device);
1085         intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
1086         writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
1087         return 0;
1088
1089 err_no_irq:
1090         /* Disable all interrupt generation */
1091         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1092         dev_err(dev, "no usable interrupts\n");
1093         return err;
1094 }
1095
1096 static void ioat_disable_interrupts(struct ioatdma_device *device)
1097 {
1098         /* Disable all interrupt generation */
1099         writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
1100 }
1101
1102 int ioat_probe(struct ioatdma_device *device)
1103 {
1104         int err = -ENODEV;
1105         struct dma_device *dma = &device->common;
1106         struct pci_dev *pdev = device->pdev;
1107         struct device *dev = &pdev->dev;
1108
1109         /* DMA coherent memory pool for DMA descriptor allocations */
1110         device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
1111                                            sizeof(struct ioat_dma_descriptor),
1112                                            64, 0);
1113         if (!device->dma_pool) {
1114                 err = -ENOMEM;
1115                 goto err_dma_pool;
1116         }
1117
1118         device->completion_pool = pci_pool_create("completion_pool", pdev,
1119                                                   sizeof(u64), SMP_CACHE_BYTES,
1120                                                   SMP_CACHE_BYTES);
1121
1122         if (!device->completion_pool) {
1123                 err = -ENOMEM;
1124                 goto err_completion_pool;
1125         }
1126
1127         device->enumerate_channels(device);
1128
1129         dma_cap_set(DMA_MEMCPY, dma->cap_mask);
1130         dma->dev = &pdev->dev;
1131
1132         dev_err(dev, "Intel(R) I/OAT DMA Engine found,"
1133                 " %d channels, device version 0x%02x, driver version %s\n",
1134                 dma->chancnt, device->version, IOAT_DMA_VERSION);
1135
1136         if (!dma->chancnt) {
1137                 dev_err(dev, "Intel(R) I/OAT DMA Engine problem found: "
1138                         "zero channels detected\n");
1139                 goto err_setup_interrupts;
1140         }
1141
1142         err = ioat_dma_setup_interrupts(device);
1143         if (err)
1144                 goto err_setup_interrupts;
1145
1146         err = ioat_dma_self_test(device);
1147         if (err)
1148                 goto err_self_test;
1149
1150         return 0;
1151
1152 err_self_test:
1153         ioat_disable_interrupts(device);
1154 err_setup_interrupts:
1155         pci_pool_destroy(device->completion_pool);
1156 err_completion_pool:
1157         pci_pool_destroy(device->dma_pool);
1158 err_dma_pool:
1159         return err;
1160 }
1161
1162 int ioat_register(struct ioatdma_device *device)
1163 {
1164         int err = dma_async_device_register(&device->common);
1165
1166         if (err) {
1167                 ioat_disable_interrupts(device);
1168                 pci_pool_destroy(device->completion_pool);
1169                 pci_pool_destroy(device->dma_pool);
1170         }
1171
1172         return err;
1173 }
1174
1175 /* ioat1_intr_quirk - fix up dma ctrl register to enable / disable msi */
1176 static void ioat1_intr_quirk(struct ioatdma_device *device)
1177 {
1178         struct pci_dev *pdev = device->pdev;
1179         u32 dmactrl;
1180
1181         pci_read_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
1182         if (pdev->msi_enabled)
1183                 dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
1184         else
1185                 dmactrl &= ~IOAT_PCI_DMACTRL_MSI_EN;
1186         pci_write_config_dword(pdev, IOAT_PCI_DMACTRL_OFFSET, dmactrl);
1187 }
1188
1189 int ioat1_dma_probe(struct ioatdma_device *device, int dca)
1190 {
1191         struct pci_dev *pdev = device->pdev;
1192         struct dma_device *dma;
1193         int err;
1194
1195         device->intr_quirk = ioat1_intr_quirk;
1196         device->enumerate_channels = ioat1_enumerate_channels;
1197         dma = &device->common;
1198         dma->device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
1199         dma->device_issue_pending = ioat1_dma_memcpy_issue_pending;
1200         dma->device_alloc_chan_resources = ioat1_dma_alloc_chan_resources;
1201         dma->device_free_chan_resources = ioat1_dma_free_chan_resources;
1202         dma->device_is_tx_complete = ioat1_dma_is_complete;
1203
1204         err = ioat_probe(device);
1205         if (err)
1206                 return err;
1207         ioat_set_tcp_copy_break(4096);
1208         err = ioat_register(device);
1209         if (err)
1210                 return err;
1211         if (dca)
1212                 device->dca = ioat_dca_init(pdev, device->reg_base);
1213
1214         INIT_DELAYED_WORK(&device->work, ioat1_chan_watchdog);
1215         schedule_delayed_work(&device->work, WATCHDOG_DELAY);
1216
1217         return err;
1218 }
1219
1220 void ioat_dma_remove(struct ioatdma_device *device)
1221 {
1222         struct dma_device *dma = &device->common;
1223
1224         if (device->version != IOAT_VER_3_0)
1225                 cancel_delayed_work(&device->work);
1226
1227         ioat_disable_interrupts(device);
1228
1229         dma_async_device_unregister(dma);
1230
1231         pci_pool_destroy(device->dma_pool);
1232         pci_pool_destroy(device->completion_pool);
1233
1234         INIT_LIST_HEAD(&dma->channels);
1235 }