]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/sfc/tx.c
sfc: Eliminate indirect lookups of queue size constants
[net-next-2.6.git] / drivers / net / sfc / tx.c
1 /****************************************************************************
2  * Driver for Solarflare Solarstorm network controllers and boards
3  * Copyright 2005-2006 Fen Systems Ltd.
4  * Copyright 2005-2008 Solarflare Communications Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
13 #include <linux/ip.h>
14 #include <linux/in.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
18 #include "tx.h"
19 #include "efx.h"
20 #include "falcon.h"
21 #include "workarounds.h"
22
23 /*
24  * TX descriptor ring full threshold
25  *
26  * The tx_queue descriptor ring fill-level must fall below this value
27  * before we restart the netif queue
28  */
29 #define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
30
31 /* We want to be able to nest calls to netif_stop_queue(), since each
32  * channel can have an individual stop on the queue.
33  */
34 void efx_stop_queue(struct efx_nic *efx)
35 {
36         spin_lock_bh(&efx->netif_stop_lock);
37         EFX_TRACE(efx, "stop TX queue\n");
38
39         atomic_inc(&efx->netif_stop_count);
40         netif_stop_queue(efx->net_dev);
41
42         spin_unlock_bh(&efx->netif_stop_lock);
43 }
44
45 /* Wake netif's TX queue
46  * We want to be able to nest calls to netif_stop_queue(), since each
47  * channel can have an individual stop on the queue.
48  */
49 void efx_wake_queue(struct efx_nic *efx)
50 {
51         local_bh_disable();
52         if (atomic_dec_and_lock(&efx->netif_stop_count,
53                                 &efx->netif_stop_lock)) {
54                 EFX_TRACE(efx, "waking TX queue\n");
55                 netif_wake_queue(efx->net_dev);
56                 spin_unlock(&efx->netif_stop_lock);
57         }
58         local_bh_enable();
59 }
60
61 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
62                                struct efx_tx_buffer *buffer)
63 {
64         if (buffer->unmap_len) {
65                 struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
66                 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
67                                          buffer->unmap_len);
68                 if (buffer->unmap_single)
69                         pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
70                                          PCI_DMA_TODEVICE);
71                 else
72                         pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
73                                        PCI_DMA_TODEVICE);
74                 buffer->unmap_len = 0;
75                 buffer->unmap_single = false;
76         }
77
78         if (buffer->skb) {
79                 dev_kfree_skb_any((struct sk_buff *) buffer->skb);
80                 buffer->skb = NULL;
81                 EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
82                           "complete\n", tx_queue->queue, read_ptr);
83         }
84 }
85
86 /**
87  * struct efx_tso_header - a DMA mapped buffer for packet headers
88  * @next: Linked list of free ones.
89  *      The list is protected by the TX queue lock.
90  * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
91  * @dma_addr: The DMA address of the header below.
92  *
93  * This controls the memory used for a TSO header.  Use TSOH_DATA()
94  * to find the packet header data.  Use TSOH_SIZE() to calculate the
95  * total size required for a given packet header length.  TSO headers
96  * in the free list are exactly %TSOH_STD_SIZE bytes in size.
97  */
98 struct efx_tso_header {
99         union {
100                 struct efx_tso_header *next;
101                 size_t unmap_len;
102         };
103         dma_addr_t dma_addr;
104 };
105
106 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
107                                struct sk_buff *skb);
108 static void efx_fini_tso(struct efx_tx_queue *tx_queue);
109 static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
110                                struct efx_tso_header *tsoh);
111
112 static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
113                           struct efx_tx_buffer *buffer)
114 {
115         if (buffer->tsoh) {
116                 if (likely(!buffer->tsoh->unmap_len)) {
117                         buffer->tsoh->next = tx_queue->tso_headers_free;
118                         tx_queue->tso_headers_free = buffer->tsoh;
119                 } else {
120                         efx_tsoh_heap_free(tx_queue, buffer->tsoh);
121                 }
122                 buffer->tsoh = NULL;
123         }
124 }
125
126
127 /*
128  * Add a socket buffer to a TX queue
129  *
130  * This maps all fragments of a socket buffer for DMA and adds them to
131  * the TX queue.  The queue's insert pointer will be incremented by
132  * the number of fragments in the socket buffer.
133  *
134  * If any DMA mapping fails, any mapped fragments will be unmapped,
135  * the queue's insert pointer will be restored to its original value.
136  *
137  * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
138  * You must hold netif_tx_lock() to call this function.
139  */
140 static netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue,
141                                          struct sk_buff *skb)
142 {
143         struct efx_nic *efx = tx_queue->efx;
144         struct pci_dev *pci_dev = efx->pci_dev;
145         struct efx_tx_buffer *buffer;
146         skb_frag_t *fragment;
147         struct page *page;
148         int page_offset;
149         unsigned int len, unmap_len = 0, fill_level, insert_ptr, misalign;
150         dma_addr_t dma_addr, unmap_addr = 0;
151         unsigned int dma_len;
152         bool unmap_single;
153         int q_space, i = 0;
154         netdev_tx_t rc = NETDEV_TX_OK;
155
156         EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
157
158         if (skb_shinfo((struct sk_buff *)skb)->gso_size)
159                 return efx_enqueue_skb_tso(tx_queue, skb);
160
161         /* Get size of the initial fragment */
162         len = skb_headlen(skb);
163
164         /* Pad if necessary */
165         if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
166                 EFX_BUG_ON_PARANOID(skb->data_len);
167                 len = 32 + 1;
168                 if (skb_pad(skb, len - skb->len))
169                         return NETDEV_TX_OK;
170         }
171
172         fill_level = tx_queue->insert_count - tx_queue->old_read_count;
173         q_space = EFX_TXQ_MASK - 1 - fill_level;
174
175         /* Map for DMA.  Use pci_map_single rather than pci_map_page
176          * since this is more efficient on machines with sparse
177          * memory.
178          */
179         unmap_single = true;
180         dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
181
182         /* Process all fragments */
183         while (1) {
184                 if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
185                         goto pci_err;
186
187                 /* Store fields for marking in the per-fragment final
188                  * descriptor */
189                 unmap_len = len;
190                 unmap_addr = dma_addr;
191
192                 /* Add to TX queue, splitting across DMA boundaries */
193                 do {
194                         if (unlikely(q_space-- <= 0)) {
195                                 /* It might be that completions have
196                                  * happened since the xmit path last
197                                  * checked.  Update the xmit path's
198                                  * copy of read_count.
199                                  */
200                                 ++tx_queue->stopped;
201                                 /* This memory barrier protects the
202                                  * change of stopped from the access
203                                  * of read_count. */
204                                 smp_mb();
205                                 tx_queue->old_read_count =
206                                         *(volatile unsigned *)
207                                         &tx_queue->read_count;
208                                 fill_level = (tx_queue->insert_count
209                                               - tx_queue->old_read_count);
210                                 q_space = EFX_TXQ_MASK - 1 - fill_level;
211                                 if (unlikely(q_space-- <= 0))
212                                         goto stop;
213                                 smp_mb();
214                                 --tx_queue->stopped;
215                         }
216
217                         insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
218                         buffer = &tx_queue->buffer[insert_ptr];
219                         efx_tsoh_free(tx_queue, buffer);
220                         EFX_BUG_ON_PARANOID(buffer->tsoh);
221                         EFX_BUG_ON_PARANOID(buffer->skb);
222                         EFX_BUG_ON_PARANOID(buffer->len);
223                         EFX_BUG_ON_PARANOID(!buffer->continuation);
224                         EFX_BUG_ON_PARANOID(buffer->unmap_len);
225
226                         dma_len = (((~dma_addr) & efx->type->tx_dma_mask) + 1);
227                         if (likely(dma_len > len))
228                                 dma_len = len;
229
230                         misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
231                         if (misalign && dma_len + misalign > 512)
232                                 dma_len = 512 - misalign;
233
234                         /* Fill out per descriptor fields */
235                         buffer->len = dma_len;
236                         buffer->dma_addr = dma_addr;
237                         len -= dma_len;
238                         dma_addr += dma_len;
239                         ++tx_queue->insert_count;
240                 } while (len);
241
242                 /* Transfer ownership of the unmapping to the final buffer */
243                 buffer->unmap_single = unmap_single;
244                 buffer->unmap_len = unmap_len;
245                 unmap_len = 0;
246
247                 /* Get address and size of next fragment */
248                 if (i >= skb_shinfo(skb)->nr_frags)
249                         break;
250                 fragment = &skb_shinfo(skb)->frags[i];
251                 len = fragment->size;
252                 page = fragment->page;
253                 page_offset = fragment->page_offset;
254                 i++;
255                 /* Map for DMA */
256                 unmap_single = false;
257                 dma_addr = pci_map_page(pci_dev, page, page_offset, len,
258                                         PCI_DMA_TODEVICE);
259         }
260
261         /* Transfer ownership of the skb to the final buffer */
262         buffer->skb = skb;
263         buffer->continuation = false;
264
265         /* Pass off to hardware */
266         falcon_push_buffers(tx_queue);
267
268         return NETDEV_TX_OK;
269
270  pci_err:
271         EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
272                    "fragments for DMA\n", tx_queue->queue, skb->len,
273                    skb_shinfo(skb)->nr_frags + 1);
274
275         /* Mark the packet as transmitted, and free the SKB ourselves */
276         dev_kfree_skb_any((struct sk_buff *)skb);
277         goto unwind;
278
279  stop:
280         rc = NETDEV_TX_BUSY;
281
282         if (tx_queue->stopped == 1)
283                 efx_stop_queue(efx);
284
285  unwind:
286         /* Work backwards until we hit the original insert pointer value */
287         while (tx_queue->insert_count != tx_queue->write_count) {
288                 --tx_queue->insert_count;
289                 insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
290                 buffer = &tx_queue->buffer[insert_ptr];
291                 efx_dequeue_buffer(tx_queue, buffer);
292                 buffer->len = 0;
293         }
294
295         /* Free the fragment we were mid-way through pushing */
296         if (unmap_len) {
297                 if (unmap_single)
298                         pci_unmap_single(pci_dev, unmap_addr, unmap_len,
299                                          PCI_DMA_TODEVICE);
300                 else
301                         pci_unmap_page(pci_dev, unmap_addr, unmap_len,
302                                        PCI_DMA_TODEVICE);
303         }
304
305         return rc;
306 }
307
308 /* Remove packets from the TX queue
309  *
310  * This removes packets from the TX queue, up to and including the
311  * specified index.
312  */
313 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
314                                 unsigned int index)
315 {
316         struct efx_nic *efx = tx_queue->efx;
317         unsigned int stop_index, read_ptr;
318
319         stop_index = (index + 1) & EFX_TXQ_MASK;
320         read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
321
322         while (read_ptr != stop_index) {
323                 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
324                 if (unlikely(buffer->len == 0)) {
325                         EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
326                                 "completion id %x\n", tx_queue->queue,
327                                 read_ptr);
328                         efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
329                         return;
330                 }
331
332                 efx_dequeue_buffer(tx_queue, buffer);
333                 buffer->continuation = true;
334                 buffer->len = 0;
335
336                 ++tx_queue->read_count;
337                 read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
338         }
339 }
340
341 /* Initiate a packet transmission on the specified TX queue.
342  * Note that returning anything other than NETDEV_TX_OK will cause the
343  * OS to free the skb.
344  *
345  * This function is split out from efx_hard_start_xmit to allow the
346  * loopback test to direct packets via specific TX queues.  It is
347  * therefore a non-static inline, so as not to penalise performance
348  * for non-loopback transmissions.
349  *
350  * Context: netif_tx_lock held
351  */
352 inline netdev_tx_t efx_xmit(struct efx_nic *efx,
353                            struct efx_tx_queue *tx_queue, struct sk_buff *skb)
354 {
355         /* Map fragments for DMA and add to TX queue */
356         return efx_enqueue_skb(tx_queue, skb);
357 }
358
359 /* Initiate a packet transmission.  We use one channel per CPU
360  * (sharing when we have more CPUs than channels).  On Falcon, the TX
361  * completion events will be directed back to the CPU that transmitted
362  * the packet, which should be cache-efficient.
363  *
364  * Context: non-blocking.
365  * Note that returning anything other than NETDEV_TX_OK will cause the
366  * OS to free the skb.
367  */
368 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
369                                       struct net_device *net_dev)
370 {
371         struct efx_nic *efx = netdev_priv(net_dev);
372         struct efx_tx_queue *tx_queue;
373
374         if (unlikely(efx->port_inhibited))
375                 return NETDEV_TX_BUSY;
376
377         if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
378                 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM];
379         else
380                 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM];
381
382         return efx_xmit(efx, tx_queue, skb);
383 }
384
385 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
386 {
387         unsigned fill_level;
388         struct efx_nic *efx = tx_queue->efx;
389
390         EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
391
392         efx_dequeue_buffers(tx_queue, index);
393
394         /* See if we need to restart the netif queue.  This barrier
395          * separates the update of read_count from the test of
396          * stopped. */
397         smp_mb();
398         if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
399                 fill_level = tx_queue->insert_count - tx_queue->read_count;
400                 if (fill_level < EFX_TXQ_THRESHOLD) {
401                         EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
402
403                         /* Do this under netif_tx_lock(), to avoid racing
404                          * with efx_xmit(). */
405                         netif_tx_lock(efx->net_dev);
406                         if (tx_queue->stopped) {
407                                 tx_queue->stopped = 0;
408                                 efx_wake_queue(efx);
409                         }
410                         netif_tx_unlock(efx->net_dev);
411                 }
412         }
413 }
414
415 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
416 {
417         struct efx_nic *efx = tx_queue->efx;
418         unsigned int txq_size;
419         int i, rc;
420
421         EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
422
423         /* Allocate software ring */
424         txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
425         tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
426         if (!tx_queue->buffer)
427                 return -ENOMEM;
428         for (i = 0; i <= EFX_TXQ_MASK; ++i)
429                 tx_queue->buffer[i].continuation = true;
430
431         /* Allocate hardware ring */
432         rc = falcon_probe_tx(tx_queue);
433         if (rc)
434                 goto fail;
435
436         return 0;
437
438  fail:
439         kfree(tx_queue->buffer);
440         tx_queue->buffer = NULL;
441         return rc;
442 }
443
444 void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
445 {
446         EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
447
448         tx_queue->insert_count = 0;
449         tx_queue->write_count = 0;
450         tx_queue->read_count = 0;
451         tx_queue->old_read_count = 0;
452         BUG_ON(tx_queue->stopped);
453
454         /* Set up TX descriptor ring */
455         falcon_init_tx(tx_queue);
456 }
457
458 void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
459 {
460         struct efx_tx_buffer *buffer;
461
462         if (!tx_queue->buffer)
463                 return;
464
465         /* Free any buffers left in the ring */
466         while (tx_queue->read_count != tx_queue->write_count) {
467                 buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
468                 efx_dequeue_buffer(tx_queue, buffer);
469                 buffer->continuation = true;
470                 buffer->len = 0;
471
472                 ++tx_queue->read_count;
473         }
474 }
475
476 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
477 {
478         EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
479
480         /* Flush TX queue, remove descriptor ring */
481         falcon_fini_tx(tx_queue);
482
483         efx_release_tx_buffers(tx_queue);
484
485         /* Free up TSO header cache */
486         efx_fini_tso(tx_queue);
487
488         /* Release queue's stop on port, if any */
489         if (tx_queue->stopped) {
490                 tx_queue->stopped = 0;
491                 efx_wake_queue(tx_queue->efx);
492         }
493 }
494
495 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
496 {
497         EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
498         falcon_remove_tx(tx_queue);
499
500         kfree(tx_queue->buffer);
501         tx_queue->buffer = NULL;
502 }
503
504
505 /* Efx TCP segmentation acceleration.
506  *
507  * Why?  Because by doing it here in the driver we can go significantly
508  * faster than the GSO.
509  *
510  * Requires TX checksum offload support.
511  */
512
513 /* Number of bytes inserted at the start of a TSO header buffer,
514  * similar to NET_IP_ALIGN.
515  */
516 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
517 #define TSOH_OFFSET     0
518 #else
519 #define TSOH_OFFSET     NET_IP_ALIGN
520 #endif
521
522 #define TSOH_BUFFER(tsoh)       ((u8 *)(tsoh + 1) + TSOH_OFFSET)
523
524 /* Total size of struct efx_tso_header, buffer and padding */
525 #define TSOH_SIZE(hdr_len)                                      \
526         (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
527
528 /* Size of blocks on free list.  Larger blocks must be allocated from
529  * the heap.
530  */
531 #define TSOH_STD_SIZE           128
532
533 #define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
534 #define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
535 #define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
536 #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
537
538 /**
539  * struct tso_state - TSO state for an SKB
540  * @out_len: Remaining length in current segment
541  * @seqnum: Current sequence number
542  * @ipv4_id: Current IPv4 ID, host endian
543  * @packet_space: Remaining space in current packet
544  * @dma_addr: DMA address of current position
545  * @in_len: Remaining length in current SKB fragment
546  * @unmap_len: Length of SKB fragment
547  * @unmap_addr: DMA address of SKB fragment
548  * @unmap_single: DMA single vs page mapping flag
549  * @header_len: Number of bytes of header
550  * @full_packet_size: Number of bytes to put in each outgoing segment
551  *
552  * The state used during segmentation.  It is put into this data structure
553  * just to make it easy to pass into inline functions.
554  */
555 struct tso_state {
556         /* Output position */
557         unsigned out_len;
558         unsigned seqnum;
559         unsigned ipv4_id;
560         unsigned packet_space;
561
562         /* Input position */
563         dma_addr_t dma_addr;
564         unsigned in_len;
565         unsigned unmap_len;
566         dma_addr_t unmap_addr;
567         bool unmap_single;
568
569         unsigned header_len;
570         int full_packet_size;
571 };
572
573
574 /*
575  * Verify that our various assumptions about sk_buffs and the conditions
576  * under which TSO will be attempted hold true.
577  */
578 static void efx_tso_check_safe(struct sk_buff *skb)
579 {
580         __be16 protocol = skb->protocol;
581
582         EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
583                             protocol);
584         if (protocol == htons(ETH_P_8021Q)) {
585                 /* Find the encapsulated protocol; reset network header
586                  * and transport header based on that. */
587                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
588                 protocol = veh->h_vlan_encapsulated_proto;
589                 skb_set_network_header(skb, sizeof(*veh));
590                 if (protocol == htons(ETH_P_IP))
591                         skb_set_transport_header(skb, sizeof(*veh) +
592                                                  4 * ip_hdr(skb)->ihl);
593         }
594
595         EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP));
596         EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
597         EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
598                              + (tcp_hdr(skb)->doff << 2u)) >
599                             skb_headlen(skb));
600 }
601
602
603 /*
604  * Allocate a page worth of efx_tso_header structures, and string them
605  * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
606  */
607 static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
608 {
609
610         struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
611         struct efx_tso_header *tsoh;
612         dma_addr_t dma_addr;
613         u8 *base_kva, *kva;
614
615         base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
616         if (base_kva == NULL) {
617                 EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
618                         " headers\n");
619                 return -ENOMEM;
620         }
621
622         /* pci_alloc_consistent() allocates pages. */
623         EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
624
625         for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
626                 tsoh = (struct efx_tso_header *)kva;
627                 tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
628                 tsoh->next = tx_queue->tso_headers_free;
629                 tx_queue->tso_headers_free = tsoh;
630         }
631
632         return 0;
633 }
634
635
636 /* Free up a TSO header, and all others in the same page. */
637 static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
638                                 struct efx_tso_header *tsoh,
639                                 struct pci_dev *pci_dev)
640 {
641         struct efx_tso_header **p;
642         unsigned long base_kva;
643         dma_addr_t base_dma;
644
645         base_kva = (unsigned long)tsoh & PAGE_MASK;
646         base_dma = tsoh->dma_addr & PAGE_MASK;
647
648         p = &tx_queue->tso_headers_free;
649         while (*p != NULL) {
650                 if (((unsigned long)*p & PAGE_MASK) == base_kva)
651                         *p = (*p)->next;
652                 else
653                         p = &(*p)->next;
654         }
655
656         pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
657 }
658
659 static struct efx_tso_header *
660 efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
661 {
662         struct efx_tso_header *tsoh;
663
664         tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
665         if (unlikely(!tsoh))
666                 return NULL;
667
668         tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
669                                         TSOH_BUFFER(tsoh), header_len,
670                                         PCI_DMA_TODEVICE);
671         if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
672                                            tsoh->dma_addr))) {
673                 kfree(tsoh);
674                 return NULL;
675         }
676
677         tsoh->unmap_len = header_len;
678         return tsoh;
679 }
680
681 static void
682 efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
683 {
684         pci_unmap_single(tx_queue->efx->pci_dev,
685                          tsoh->dma_addr, tsoh->unmap_len,
686                          PCI_DMA_TODEVICE);
687         kfree(tsoh);
688 }
689
690 /**
691  * efx_tx_queue_insert - push descriptors onto the TX queue
692  * @tx_queue:           Efx TX queue
693  * @dma_addr:           DMA address of fragment
694  * @len:                Length of fragment
695  * @final_buffer:       The final buffer inserted into the queue
696  *
697  * Push descriptors onto the TX queue.  Return 0 on success or 1 if
698  * @tx_queue full.
699  */
700 static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
701                                dma_addr_t dma_addr, unsigned len,
702                                struct efx_tx_buffer **final_buffer)
703 {
704         struct efx_tx_buffer *buffer;
705         struct efx_nic *efx = tx_queue->efx;
706         unsigned dma_len, fill_level, insert_ptr, misalign;
707         int q_space;
708
709         EFX_BUG_ON_PARANOID(len <= 0);
710
711         fill_level = tx_queue->insert_count - tx_queue->old_read_count;
712         /* -1 as there is no way to represent all descriptors used */
713         q_space = EFX_TXQ_MASK - 1 - fill_level;
714
715         while (1) {
716                 if (unlikely(q_space-- <= 0)) {
717                         /* It might be that completions have happened
718                          * since the xmit path last checked.  Update
719                          * the xmit path's copy of read_count.
720                          */
721                         ++tx_queue->stopped;
722                         /* This memory barrier protects the change of
723                          * stopped from the access of read_count. */
724                         smp_mb();
725                         tx_queue->old_read_count =
726                                 *(volatile unsigned *)&tx_queue->read_count;
727                         fill_level = (tx_queue->insert_count
728                                       - tx_queue->old_read_count);
729                         q_space = EFX_TXQ_MASK - 1 - fill_level;
730                         if (unlikely(q_space-- <= 0)) {
731                                 *final_buffer = NULL;
732                                 return 1;
733                         }
734                         smp_mb();
735                         --tx_queue->stopped;
736                 }
737
738                 insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
739                 buffer = &tx_queue->buffer[insert_ptr];
740                 ++tx_queue->insert_count;
741
742                 EFX_BUG_ON_PARANOID(tx_queue->insert_count -
743                                     tx_queue->read_count >
744                                     EFX_TXQ_MASK);
745
746                 efx_tsoh_free(tx_queue, buffer);
747                 EFX_BUG_ON_PARANOID(buffer->len);
748                 EFX_BUG_ON_PARANOID(buffer->unmap_len);
749                 EFX_BUG_ON_PARANOID(buffer->skb);
750                 EFX_BUG_ON_PARANOID(!buffer->continuation);
751                 EFX_BUG_ON_PARANOID(buffer->tsoh);
752
753                 buffer->dma_addr = dma_addr;
754
755                 /* Ensure we do not cross a boundary unsupported by H/W */
756                 dma_len = (~dma_addr & efx->type->tx_dma_mask) + 1;
757
758                 misalign = (unsigned)dma_addr & efx->type->bug5391_mask;
759                 if (misalign && dma_len + misalign > 512)
760                         dma_len = 512 - misalign;
761
762                 /* If there is enough space to send then do so */
763                 if (dma_len >= len)
764                         break;
765
766                 buffer->len = dma_len; /* Don't set the other members */
767                 dma_addr += dma_len;
768                 len -= dma_len;
769         }
770
771         EFX_BUG_ON_PARANOID(!len);
772         buffer->len = len;
773         *final_buffer = buffer;
774         return 0;
775 }
776
777
778 /*
779  * Put a TSO header into the TX queue.
780  *
781  * This is special-cased because we know that it is small enough to fit in
782  * a single fragment, and we know it doesn't cross a page boundary.  It
783  * also allows us to not worry about end-of-packet etc.
784  */
785 static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
786                                struct efx_tso_header *tsoh, unsigned len)
787 {
788         struct efx_tx_buffer *buffer;
789
790         buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
791         efx_tsoh_free(tx_queue, buffer);
792         EFX_BUG_ON_PARANOID(buffer->len);
793         EFX_BUG_ON_PARANOID(buffer->unmap_len);
794         EFX_BUG_ON_PARANOID(buffer->skb);
795         EFX_BUG_ON_PARANOID(!buffer->continuation);
796         EFX_BUG_ON_PARANOID(buffer->tsoh);
797         buffer->len = len;
798         buffer->dma_addr = tsoh->dma_addr;
799         buffer->tsoh = tsoh;
800
801         ++tx_queue->insert_count;
802 }
803
804
805 /* Remove descriptors put into a tx_queue. */
806 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
807 {
808         struct efx_tx_buffer *buffer;
809         dma_addr_t unmap_addr;
810
811         /* Work backwards until we hit the original insert pointer value */
812         while (tx_queue->insert_count != tx_queue->write_count) {
813                 --tx_queue->insert_count;
814                 buffer = &tx_queue->buffer[tx_queue->insert_count &
815                                            EFX_TXQ_MASK];
816                 efx_tsoh_free(tx_queue, buffer);
817                 EFX_BUG_ON_PARANOID(buffer->skb);
818                 buffer->len = 0;
819                 buffer->continuation = true;
820                 if (buffer->unmap_len) {
821                         unmap_addr = (buffer->dma_addr + buffer->len -
822                                       buffer->unmap_len);
823                         if (buffer->unmap_single)
824                                 pci_unmap_single(tx_queue->efx->pci_dev,
825                                                  unmap_addr, buffer->unmap_len,
826                                                  PCI_DMA_TODEVICE);
827                         else
828                                 pci_unmap_page(tx_queue->efx->pci_dev,
829                                                unmap_addr, buffer->unmap_len,
830                                                PCI_DMA_TODEVICE);
831                         buffer->unmap_len = 0;
832                 }
833         }
834 }
835
836
837 /* Parse the SKB header and initialise state. */
838 static void tso_start(struct tso_state *st, const struct sk_buff *skb)
839 {
840         /* All ethernet/IP/TCP headers combined size is TCP header size
841          * plus offset of TCP header relative to start of packet.
842          */
843         st->header_len = ((tcp_hdr(skb)->doff << 2u)
844                           + PTR_DIFF(tcp_hdr(skb), skb->data));
845         st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
846
847         st->ipv4_id = ntohs(ip_hdr(skb)->id);
848         st->seqnum = ntohl(tcp_hdr(skb)->seq);
849
850         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
851         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
852         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
853
854         st->packet_space = st->full_packet_size;
855         st->out_len = skb->len - st->header_len;
856         st->unmap_len = 0;
857         st->unmap_single = false;
858 }
859
860 static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
861                             skb_frag_t *frag)
862 {
863         st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
864                                       frag->page_offset, frag->size,
865                                       PCI_DMA_TODEVICE);
866         if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
867                 st->unmap_single = false;
868                 st->unmap_len = frag->size;
869                 st->in_len = frag->size;
870                 st->dma_addr = st->unmap_addr;
871                 return 0;
872         }
873         return -ENOMEM;
874 }
875
876 static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
877                                  const struct sk_buff *skb)
878 {
879         int hl = st->header_len;
880         int len = skb_headlen(skb) - hl;
881
882         st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
883                                         len, PCI_DMA_TODEVICE);
884         if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
885                 st->unmap_single = true;
886                 st->unmap_len = len;
887                 st->in_len = len;
888                 st->dma_addr = st->unmap_addr;
889                 return 0;
890         }
891         return -ENOMEM;
892 }
893
894
895 /**
896  * tso_fill_packet_with_fragment - form descriptors for the current fragment
897  * @tx_queue:           Efx TX queue
898  * @skb:                Socket buffer
899  * @st:                 TSO state
900  *
901  * Form descriptors for the current fragment, until we reach the end
902  * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
903  * space in @tx_queue.
904  */
905 static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
906                                          const struct sk_buff *skb,
907                                          struct tso_state *st)
908 {
909         struct efx_tx_buffer *buffer;
910         int n, end_of_packet, rc;
911
912         if (st->in_len == 0)
913                 return 0;
914         if (st->packet_space == 0)
915                 return 0;
916
917         EFX_BUG_ON_PARANOID(st->in_len <= 0);
918         EFX_BUG_ON_PARANOID(st->packet_space <= 0);
919
920         n = min(st->in_len, st->packet_space);
921
922         st->packet_space -= n;
923         st->out_len -= n;
924         st->in_len -= n;
925
926         rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
927         if (likely(rc == 0)) {
928                 if (st->out_len == 0)
929                         /* Transfer ownership of the skb */
930                         buffer->skb = skb;
931
932                 end_of_packet = st->out_len == 0 || st->packet_space == 0;
933                 buffer->continuation = !end_of_packet;
934
935                 if (st->in_len == 0) {
936                         /* Transfer ownership of the pci mapping */
937                         buffer->unmap_len = st->unmap_len;
938                         buffer->unmap_single = st->unmap_single;
939                         st->unmap_len = 0;
940                 }
941         }
942
943         st->dma_addr += n;
944         return rc;
945 }
946
947
948 /**
949  * tso_start_new_packet - generate a new header and prepare for the new packet
950  * @tx_queue:           Efx TX queue
951  * @skb:                Socket buffer
952  * @st:                 TSO state
953  *
954  * Generate a new header and prepare for the new packet.  Return 0 on
955  * success, or -1 if failed to alloc header.
956  */
957 static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
958                                 const struct sk_buff *skb,
959                                 struct tso_state *st)
960 {
961         struct efx_tso_header *tsoh;
962         struct iphdr *tsoh_iph;
963         struct tcphdr *tsoh_th;
964         unsigned ip_length;
965         u8 *header;
966
967         /* Allocate a DMA-mapped header buffer. */
968         if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
969                 if (tx_queue->tso_headers_free == NULL) {
970                         if (efx_tsoh_block_alloc(tx_queue))
971                                 return -1;
972                 }
973                 EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
974                 tsoh = tx_queue->tso_headers_free;
975                 tx_queue->tso_headers_free = tsoh->next;
976                 tsoh->unmap_len = 0;
977         } else {
978                 tx_queue->tso_long_headers++;
979                 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
980                 if (unlikely(!tsoh))
981                         return -1;
982         }
983
984         header = TSOH_BUFFER(tsoh);
985         tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
986         tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
987
988         /* Copy and update the headers. */
989         memcpy(header, skb->data, st->header_len);
990
991         tsoh_th->seq = htonl(st->seqnum);
992         st->seqnum += skb_shinfo(skb)->gso_size;
993         if (st->out_len > skb_shinfo(skb)->gso_size) {
994                 /* This packet will not finish the TSO burst. */
995                 ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
996                 tsoh_th->fin = 0;
997                 tsoh_th->psh = 0;
998         } else {
999                 /* This packet will be the last in the TSO burst. */
1000                 ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
1001                 tsoh_th->fin = tcp_hdr(skb)->fin;
1002                 tsoh_th->psh = tcp_hdr(skb)->psh;
1003         }
1004         tsoh_iph->tot_len = htons(ip_length);
1005
1006         /* Linux leaves suitable gaps in the IP ID space for us to fill. */
1007         tsoh_iph->id = htons(st->ipv4_id);
1008         st->ipv4_id++;
1009
1010         st->packet_space = skb_shinfo(skb)->gso_size;
1011         ++tx_queue->tso_packets;
1012
1013         /* Form a descriptor for this header. */
1014         efx_tso_put_header(tx_queue, tsoh, st->header_len);
1015
1016         return 0;
1017 }
1018
1019
1020 /**
1021  * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
1022  * @tx_queue:           Efx TX queue
1023  * @skb:                Socket buffer
1024  *
1025  * Context: You must hold netif_tx_lock() to call this function.
1026  *
1027  * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1028  * @skb was not enqueued.  In all cases @skb is consumed.  Return
1029  * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1030  */
1031 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1032                                struct sk_buff *skb)
1033 {
1034         struct efx_nic *efx = tx_queue->efx;
1035         int frag_i, rc, rc2 = NETDEV_TX_OK;
1036         struct tso_state state;
1037
1038         /* Verify TSO is safe - these checks should never fail. */
1039         efx_tso_check_safe(skb);
1040
1041         EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
1042
1043         tso_start(&state, skb);
1044
1045         /* Assume that skb header area contains exactly the headers, and
1046          * all payload is in the frag list.
1047          */
1048         if (skb_headlen(skb) == state.header_len) {
1049                 /* Grab the first payload fragment. */
1050                 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
1051                 frag_i = 0;
1052                 rc = tso_get_fragment(&state, efx,
1053                                       skb_shinfo(skb)->frags + frag_i);
1054                 if (rc)
1055                         goto mem_err;
1056         } else {
1057                 rc = tso_get_head_fragment(&state, efx, skb);
1058                 if (rc)
1059                         goto mem_err;
1060                 frag_i = -1;
1061         }
1062
1063         if (tso_start_new_packet(tx_queue, skb, &state) < 0)
1064                 goto mem_err;
1065
1066         while (1) {
1067                 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
1068                 if (unlikely(rc))
1069                         goto stop;
1070
1071                 /* Move onto the next fragment? */
1072                 if (state.in_len == 0) {
1073                         if (++frag_i >= skb_shinfo(skb)->nr_frags)
1074                                 /* End of payload reached. */
1075                                 break;
1076                         rc = tso_get_fragment(&state, efx,
1077                                               skb_shinfo(skb)->frags + frag_i);
1078                         if (rc)
1079                                 goto mem_err;
1080                 }
1081
1082                 /* Start at new packet? */
1083                 if (state.packet_space == 0 &&
1084                     tso_start_new_packet(tx_queue, skb, &state) < 0)
1085                         goto mem_err;
1086         }
1087
1088         /* Pass off to hardware */
1089         falcon_push_buffers(tx_queue);
1090
1091         tx_queue->tso_bursts++;
1092         return NETDEV_TX_OK;
1093
1094  mem_err:
1095         EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
1096         dev_kfree_skb_any((struct sk_buff *)skb);
1097         goto unwind;
1098
1099  stop:
1100         rc2 = NETDEV_TX_BUSY;
1101
1102         /* Stop the queue if it wasn't stopped before. */
1103         if (tx_queue->stopped == 1)
1104                 efx_stop_queue(efx);
1105
1106  unwind:
1107         /* Free the DMA mapping we were in the process of writing out */
1108         if (state.unmap_len) {
1109                 if (state.unmap_single)
1110                         pci_unmap_single(efx->pci_dev, state.unmap_addr,
1111                                          state.unmap_len, PCI_DMA_TODEVICE);
1112                 else
1113                         pci_unmap_page(efx->pci_dev, state.unmap_addr,
1114                                        state.unmap_len, PCI_DMA_TODEVICE);
1115         }
1116
1117         efx_enqueue_unwind(tx_queue);
1118         return rc2;
1119 }
1120
1121
1122 /*
1123  * Free up all TSO datastructures associated with tx_queue. This
1124  * routine should be called only once the tx_queue is both empty and
1125  * will no longer be used.
1126  */
1127 static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1128 {
1129         unsigned i;
1130
1131         if (tx_queue->buffer) {
1132                 for (i = 0; i <= EFX_TXQ_MASK; ++i)
1133                         efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1134         }
1135
1136         while (tx_queue->tso_headers_free != NULL)
1137                 efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1138                                     tx_queue->efx->pci_dev);
1139 }