]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/sfc/tx.c
sfc: Rename falcon.h to nic.h
[net-next-2.6.git] / drivers / net / sfc / tx.c
1 /****************************************************************************
2  * Driver for Solarflare Solarstorm network controllers and boards
3  * Copyright 2005-2006 Fen Systems Ltd.
4  * Copyright 2005-2008 Solarflare Communications Inc.
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License version 2 as published
8  * by the Free Software Foundation, incorporated herein by reference.
9  */
10
11 #include <linux/pci.h>
12 #include <linux/tcp.h>
13 #include <linux/ip.h>
14 #include <linux/in.h>
15 #include <linux/if_ether.h>
16 #include <linux/highmem.h>
17 #include "net_driver.h"
18 #include "efx.h"
19 #include "nic.h"
20 #include "workarounds.h"
21
22 /*
23  * TX descriptor ring full threshold
24  *
25  * The tx_queue descriptor ring fill-level must fall below this value
26  * before we restart the netif queue
27  */
28 #define EFX_TXQ_THRESHOLD (EFX_TXQ_MASK / 2u)
29
30 /* We want to be able to nest calls to netif_stop_queue(), since each
31  * channel can have an individual stop on the queue.
32  */
33 void efx_stop_queue(struct efx_nic *efx)
34 {
35         spin_lock_bh(&efx->netif_stop_lock);
36         EFX_TRACE(efx, "stop TX queue\n");
37
38         atomic_inc(&efx->netif_stop_count);
39         netif_stop_queue(efx->net_dev);
40
41         spin_unlock_bh(&efx->netif_stop_lock);
42 }
43
44 /* Wake netif's TX queue
45  * We want to be able to nest calls to netif_stop_queue(), since each
46  * channel can have an individual stop on the queue.
47  */
48 void efx_wake_queue(struct efx_nic *efx)
49 {
50         local_bh_disable();
51         if (atomic_dec_and_lock(&efx->netif_stop_count,
52                                 &efx->netif_stop_lock)) {
53                 EFX_TRACE(efx, "waking TX queue\n");
54                 netif_wake_queue(efx->net_dev);
55                 spin_unlock(&efx->netif_stop_lock);
56         }
57         local_bh_enable();
58 }
59
60 static void efx_dequeue_buffer(struct efx_tx_queue *tx_queue,
61                                struct efx_tx_buffer *buffer)
62 {
63         if (buffer->unmap_len) {
64                 struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
65                 dma_addr_t unmap_addr = (buffer->dma_addr + buffer->len -
66                                          buffer->unmap_len);
67                 if (buffer->unmap_single)
68                         pci_unmap_single(pci_dev, unmap_addr, buffer->unmap_len,
69                                          PCI_DMA_TODEVICE);
70                 else
71                         pci_unmap_page(pci_dev, unmap_addr, buffer->unmap_len,
72                                        PCI_DMA_TODEVICE);
73                 buffer->unmap_len = 0;
74                 buffer->unmap_single = false;
75         }
76
77         if (buffer->skb) {
78                 dev_kfree_skb_any((struct sk_buff *) buffer->skb);
79                 buffer->skb = NULL;
80                 EFX_TRACE(tx_queue->efx, "TX queue %d transmission id %x "
81                           "complete\n", tx_queue->queue, read_ptr);
82         }
83 }
84
85 /**
86  * struct efx_tso_header - a DMA mapped buffer for packet headers
87  * @next: Linked list of free ones.
88  *      The list is protected by the TX queue lock.
89  * @dma_unmap_len: Length to unmap for an oversize buffer, or 0.
90  * @dma_addr: The DMA address of the header below.
91  *
92  * This controls the memory used for a TSO header.  Use TSOH_DATA()
93  * to find the packet header data.  Use TSOH_SIZE() to calculate the
94  * total size required for a given packet header length.  TSO headers
95  * in the free list are exactly %TSOH_STD_SIZE bytes in size.
96  */
97 struct efx_tso_header {
98         union {
99                 struct efx_tso_header *next;
100                 size_t unmap_len;
101         };
102         dma_addr_t dma_addr;
103 };
104
105 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
106                                struct sk_buff *skb);
107 static void efx_fini_tso(struct efx_tx_queue *tx_queue);
108 static void efx_tsoh_heap_free(struct efx_tx_queue *tx_queue,
109                                struct efx_tso_header *tsoh);
110
111 static void efx_tsoh_free(struct efx_tx_queue *tx_queue,
112                           struct efx_tx_buffer *buffer)
113 {
114         if (buffer->tsoh) {
115                 if (likely(!buffer->tsoh->unmap_len)) {
116                         buffer->tsoh->next = tx_queue->tso_headers_free;
117                         tx_queue->tso_headers_free = buffer->tsoh;
118                 } else {
119                         efx_tsoh_heap_free(tx_queue, buffer->tsoh);
120                 }
121                 buffer->tsoh = NULL;
122         }
123 }
124
125
126 static inline unsigned
127 efx_max_tx_len(struct efx_nic *efx, dma_addr_t dma_addr)
128 {
129         /* Depending on the NIC revision, we can use descriptor
130          * lengths up to 8K or 8K-1.  However, since PCI Express
131          * devices must split read requests at 4K boundaries, there is
132          * little benefit from using descriptors that cross those
133          * boundaries and we keep things simple by not doing so.
134          */
135         unsigned len = (~dma_addr & 0xfff) + 1;
136
137         /* Work around hardware bug for unaligned buffers. */
138         if (EFX_WORKAROUND_5391(efx) && (dma_addr & 0xf))
139                 len = min_t(unsigned, len, 512 - (dma_addr & 0xf));
140
141         return len;
142 }
143
144 /*
145  * Add a socket buffer to a TX queue
146  *
147  * This maps all fragments of a socket buffer for DMA and adds them to
148  * the TX queue.  The queue's insert pointer will be incremented by
149  * the number of fragments in the socket buffer.
150  *
151  * If any DMA mapping fails, any mapped fragments will be unmapped,
152  * the queue's insert pointer will be restored to its original value.
153  *
154  * This function is split out from efx_hard_start_xmit to allow the
155  * loopback test to direct packets via specific TX queues.
156  *
157  * Returns NETDEV_TX_OK or NETDEV_TX_BUSY
158  * You must hold netif_tx_lock() to call this function.
159  */
160 netdev_tx_t efx_enqueue_skb(struct efx_tx_queue *tx_queue, struct sk_buff *skb)
161 {
162         struct efx_nic *efx = tx_queue->efx;
163         struct pci_dev *pci_dev = efx->pci_dev;
164         struct efx_tx_buffer *buffer;
165         skb_frag_t *fragment;
166         struct page *page;
167         int page_offset;
168         unsigned int len, unmap_len = 0, fill_level, insert_ptr;
169         dma_addr_t dma_addr, unmap_addr = 0;
170         unsigned int dma_len;
171         bool unmap_single;
172         int q_space, i = 0;
173         netdev_tx_t rc = NETDEV_TX_OK;
174
175         EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
176
177         if (skb_shinfo(skb)->gso_size)
178                 return efx_enqueue_skb_tso(tx_queue, skb);
179
180         /* Get size of the initial fragment */
181         len = skb_headlen(skb);
182
183         /* Pad if necessary */
184         if (EFX_WORKAROUND_15592(efx) && skb->len <= 32) {
185                 EFX_BUG_ON_PARANOID(skb->data_len);
186                 len = 32 + 1;
187                 if (skb_pad(skb, len - skb->len))
188                         return NETDEV_TX_OK;
189         }
190
191         fill_level = tx_queue->insert_count - tx_queue->old_read_count;
192         q_space = EFX_TXQ_MASK - 1 - fill_level;
193
194         /* Map for DMA.  Use pci_map_single rather than pci_map_page
195          * since this is more efficient on machines with sparse
196          * memory.
197          */
198         unmap_single = true;
199         dma_addr = pci_map_single(pci_dev, skb->data, len, PCI_DMA_TODEVICE);
200
201         /* Process all fragments */
202         while (1) {
203                 if (unlikely(pci_dma_mapping_error(pci_dev, dma_addr)))
204                         goto pci_err;
205
206                 /* Store fields for marking in the per-fragment final
207                  * descriptor */
208                 unmap_len = len;
209                 unmap_addr = dma_addr;
210
211                 /* Add to TX queue, splitting across DMA boundaries */
212                 do {
213                         if (unlikely(q_space-- <= 0)) {
214                                 /* It might be that completions have
215                                  * happened since the xmit path last
216                                  * checked.  Update the xmit path's
217                                  * copy of read_count.
218                                  */
219                                 ++tx_queue->stopped;
220                                 /* This memory barrier protects the
221                                  * change of stopped from the access
222                                  * of read_count. */
223                                 smp_mb();
224                                 tx_queue->old_read_count =
225                                         *(volatile unsigned *)
226                                         &tx_queue->read_count;
227                                 fill_level = (tx_queue->insert_count
228                                               - tx_queue->old_read_count);
229                                 q_space = EFX_TXQ_MASK - 1 - fill_level;
230                                 if (unlikely(q_space-- <= 0))
231                                         goto stop;
232                                 smp_mb();
233                                 --tx_queue->stopped;
234                         }
235
236                         insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
237                         buffer = &tx_queue->buffer[insert_ptr];
238                         efx_tsoh_free(tx_queue, buffer);
239                         EFX_BUG_ON_PARANOID(buffer->tsoh);
240                         EFX_BUG_ON_PARANOID(buffer->skb);
241                         EFX_BUG_ON_PARANOID(buffer->len);
242                         EFX_BUG_ON_PARANOID(!buffer->continuation);
243                         EFX_BUG_ON_PARANOID(buffer->unmap_len);
244
245                         dma_len = efx_max_tx_len(efx, dma_addr);
246                         if (likely(dma_len >= len))
247                                 dma_len = len;
248
249                         /* Fill out per descriptor fields */
250                         buffer->len = dma_len;
251                         buffer->dma_addr = dma_addr;
252                         len -= dma_len;
253                         dma_addr += dma_len;
254                         ++tx_queue->insert_count;
255                 } while (len);
256
257                 /* Transfer ownership of the unmapping to the final buffer */
258                 buffer->unmap_single = unmap_single;
259                 buffer->unmap_len = unmap_len;
260                 unmap_len = 0;
261
262                 /* Get address and size of next fragment */
263                 if (i >= skb_shinfo(skb)->nr_frags)
264                         break;
265                 fragment = &skb_shinfo(skb)->frags[i];
266                 len = fragment->size;
267                 page = fragment->page;
268                 page_offset = fragment->page_offset;
269                 i++;
270                 /* Map for DMA */
271                 unmap_single = false;
272                 dma_addr = pci_map_page(pci_dev, page, page_offset, len,
273                                         PCI_DMA_TODEVICE);
274         }
275
276         /* Transfer ownership of the skb to the final buffer */
277         buffer->skb = skb;
278         buffer->continuation = false;
279
280         /* Pass off to hardware */
281         efx_nic_push_buffers(tx_queue);
282
283         return NETDEV_TX_OK;
284
285  pci_err:
286         EFX_ERR_RL(efx, " TX queue %d could not map skb with %d bytes %d "
287                    "fragments for DMA\n", tx_queue->queue, skb->len,
288                    skb_shinfo(skb)->nr_frags + 1);
289
290         /* Mark the packet as transmitted, and free the SKB ourselves */
291         dev_kfree_skb_any(skb);
292         goto unwind;
293
294  stop:
295         rc = NETDEV_TX_BUSY;
296
297         if (tx_queue->stopped == 1)
298                 efx_stop_queue(efx);
299
300  unwind:
301         /* Work backwards until we hit the original insert pointer value */
302         while (tx_queue->insert_count != tx_queue->write_count) {
303                 --tx_queue->insert_count;
304                 insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
305                 buffer = &tx_queue->buffer[insert_ptr];
306                 efx_dequeue_buffer(tx_queue, buffer);
307                 buffer->len = 0;
308         }
309
310         /* Free the fragment we were mid-way through pushing */
311         if (unmap_len) {
312                 if (unmap_single)
313                         pci_unmap_single(pci_dev, unmap_addr, unmap_len,
314                                          PCI_DMA_TODEVICE);
315                 else
316                         pci_unmap_page(pci_dev, unmap_addr, unmap_len,
317                                        PCI_DMA_TODEVICE);
318         }
319
320         return rc;
321 }
322
323 /* Remove packets from the TX queue
324  *
325  * This removes packets from the TX queue, up to and including the
326  * specified index.
327  */
328 static void efx_dequeue_buffers(struct efx_tx_queue *tx_queue,
329                                 unsigned int index)
330 {
331         struct efx_nic *efx = tx_queue->efx;
332         unsigned int stop_index, read_ptr;
333
334         stop_index = (index + 1) & EFX_TXQ_MASK;
335         read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
336
337         while (read_ptr != stop_index) {
338                 struct efx_tx_buffer *buffer = &tx_queue->buffer[read_ptr];
339                 if (unlikely(buffer->len == 0)) {
340                         EFX_ERR(tx_queue->efx, "TX queue %d spurious TX "
341                                 "completion id %x\n", tx_queue->queue,
342                                 read_ptr);
343                         efx_schedule_reset(efx, RESET_TYPE_TX_SKIP);
344                         return;
345                 }
346
347                 efx_dequeue_buffer(tx_queue, buffer);
348                 buffer->continuation = true;
349                 buffer->len = 0;
350
351                 ++tx_queue->read_count;
352                 read_ptr = tx_queue->read_count & EFX_TXQ_MASK;
353         }
354 }
355
356 /* Initiate a packet transmission.  We use one channel per CPU
357  * (sharing when we have more CPUs than channels).  On Falcon, the TX
358  * completion events will be directed back to the CPU that transmitted
359  * the packet, which should be cache-efficient.
360  *
361  * Context: non-blocking.
362  * Note that returning anything other than NETDEV_TX_OK will cause the
363  * OS to free the skb.
364  */
365 netdev_tx_t efx_hard_start_xmit(struct sk_buff *skb,
366                                       struct net_device *net_dev)
367 {
368         struct efx_nic *efx = netdev_priv(net_dev);
369         struct efx_tx_queue *tx_queue;
370
371         if (unlikely(efx->port_inhibited))
372                 return NETDEV_TX_BUSY;
373
374         if (likely(skb->ip_summed == CHECKSUM_PARTIAL))
375                 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_OFFLOAD_CSUM];
376         else
377                 tx_queue = &efx->tx_queue[EFX_TX_QUEUE_NO_CSUM];
378
379         return efx_enqueue_skb(tx_queue, skb);
380 }
381
382 void efx_xmit_done(struct efx_tx_queue *tx_queue, unsigned int index)
383 {
384         unsigned fill_level;
385         struct efx_nic *efx = tx_queue->efx;
386
387         EFX_BUG_ON_PARANOID(index > EFX_TXQ_MASK);
388
389         efx_dequeue_buffers(tx_queue, index);
390
391         /* See if we need to restart the netif queue.  This barrier
392          * separates the update of read_count from the test of
393          * stopped. */
394         smp_mb();
395         if (unlikely(tx_queue->stopped) && likely(efx->port_enabled)) {
396                 fill_level = tx_queue->insert_count - tx_queue->read_count;
397                 if (fill_level < EFX_TXQ_THRESHOLD) {
398                         EFX_BUG_ON_PARANOID(!efx_dev_registered(efx));
399
400                         /* Do this under netif_tx_lock(), to avoid racing
401                          * with efx_xmit(). */
402                         netif_tx_lock(efx->net_dev);
403                         if (tx_queue->stopped) {
404                                 tx_queue->stopped = 0;
405                                 efx_wake_queue(efx);
406                         }
407                         netif_tx_unlock(efx->net_dev);
408                 }
409         }
410 }
411
412 int efx_probe_tx_queue(struct efx_tx_queue *tx_queue)
413 {
414         struct efx_nic *efx = tx_queue->efx;
415         unsigned int txq_size;
416         int i, rc;
417
418         EFX_LOG(efx, "creating TX queue %d\n", tx_queue->queue);
419
420         /* Allocate software ring */
421         txq_size = EFX_TXQ_SIZE * sizeof(*tx_queue->buffer);
422         tx_queue->buffer = kzalloc(txq_size, GFP_KERNEL);
423         if (!tx_queue->buffer)
424                 return -ENOMEM;
425         for (i = 0; i <= EFX_TXQ_MASK; ++i)
426                 tx_queue->buffer[i].continuation = true;
427
428         /* Allocate hardware ring */
429         rc = efx_nic_probe_tx(tx_queue);
430         if (rc)
431                 goto fail;
432
433         return 0;
434
435  fail:
436         kfree(tx_queue->buffer);
437         tx_queue->buffer = NULL;
438         return rc;
439 }
440
441 void efx_init_tx_queue(struct efx_tx_queue *tx_queue)
442 {
443         EFX_LOG(tx_queue->efx, "initialising TX queue %d\n", tx_queue->queue);
444
445         tx_queue->insert_count = 0;
446         tx_queue->write_count = 0;
447         tx_queue->read_count = 0;
448         tx_queue->old_read_count = 0;
449         BUG_ON(tx_queue->stopped);
450
451         /* Set up TX descriptor ring */
452         efx_nic_init_tx(tx_queue);
453 }
454
455 void efx_release_tx_buffers(struct efx_tx_queue *tx_queue)
456 {
457         struct efx_tx_buffer *buffer;
458
459         if (!tx_queue->buffer)
460                 return;
461
462         /* Free any buffers left in the ring */
463         while (tx_queue->read_count != tx_queue->write_count) {
464                 buffer = &tx_queue->buffer[tx_queue->read_count & EFX_TXQ_MASK];
465                 efx_dequeue_buffer(tx_queue, buffer);
466                 buffer->continuation = true;
467                 buffer->len = 0;
468
469                 ++tx_queue->read_count;
470         }
471 }
472
473 void efx_fini_tx_queue(struct efx_tx_queue *tx_queue)
474 {
475         EFX_LOG(tx_queue->efx, "shutting down TX queue %d\n", tx_queue->queue);
476
477         /* Flush TX queue, remove descriptor ring */
478         efx_nic_fini_tx(tx_queue);
479
480         efx_release_tx_buffers(tx_queue);
481
482         /* Free up TSO header cache */
483         efx_fini_tso(tx_queue);
484
485         /* Release queue's stop on port, if any */
486         if (tx_queue->stopped) {
487                 tx_queue->stopped = 0;
488                 efx_wake_queue(tx_queue->efx);
489         }
490 }
491
492 void efx_remove_tx_queue(struct efx_tx_queue *tx_queue)
493 {
494         EFX_LOG(tx_queue->efx, "destroying TX queue %d\n", tx_queue->queue);
495         efx_nic_remove_tx(tx_queue);
496
497         kfree(tx_queue->buffer);
498         tx_queue->buffer = NULL;
499 }
500
501
502 /* Efx TCP segmentation acceleration.
503  *
504  * Why?  Because by doing it here in the driver we can go significantly
505  * faster than the GSO.
506  *
507  * Requires TX checksum offload support.
508  */
509
510 /* Number of bytes inserted at the start of a TSO header buffer,
511  * similar to NET_IP_ALIGN.
512  */
513 #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
514 #define TSOH_OFFSET     0
515 #else
516 #define TSOH_OFFSET     NET_IP_ALIGN
517 #endif
518
519 #define TSOH_BUFFER(tsoh)       ((u8 *)(tsoh + 1) + TSOH_OFFSET)
520
521 /* Total size of struct efx_tso_header, buffer and padding */
522 #define TSOH_SIZE(hdr_len)                                      \
523         (sizeof(struct efx_tso_header) + TSOH_OFFSET + hdr_len)
524
525 /* Size of blocks on free list.  Larger blocks must be allocated from
526  * the heap.
527  */
528 #define TSOH_STD_SIZE           128
529
530 #define PTR_DIFF(p1, p2)  ((u8 *)(p1) - (u8 *)(p2))
531 #define ETH_HDR_LEN(skb)  (skb_network_header(skb) - (skb)->data)
532 #define SKB_TCP_OFF(skb)  PTR_DIFF(tcp_hdr(skb), (skb)->data)
533 #define SKB_IPV4_OFF(skb) PTR_DIFF(ip_hdr(skb), (skb)->data)
534
535 /**
536  * struct tso_state - TSO state for an SKB
537  * @out_len: Remaining length in current segment
538  * @seqnum: Current sequence number
539  * @ipv4_id: Current IPv4 ID, host endian
540  * @packet_space: Remaining space in current packet
541  * @dma_addr: DMA address of current position
542  * @in_len: Remaining length in current SKB fragment
543  * @unmap_len: Length of SKB fragment
544  * @unmap_addr: DMA address of SKB fragment
545  * @unmap_single: DMA single vs page mapping flag
546  * @header_len: Number of bytes of header
547  * @full_packet_size: Number of bytes to put in each outgoing segment
548  *
549  * The state used during segmentation.  It is put into this data structure
550  * just to make it easy to pass into inline functions.
551  */
552 struct tso_state {
553         /* Output position */
554         unsigned out_len;
555         unsigned seqnum;
556         unsigned ipv4_id;
557         unsigned packet_space;
558
559         /* Input position */
560         dma_addr_t dma_addr;
561         unsigned in_len;
562         unsigned unmap_len;
563         dma_addr_t unmap_addr;
564         bool unmap_single;
565
566         unsigned header_len;
567         int full_packet_size;
568 };
569
570
571 /*
572  * Verify that our various assumptions about sk_buffs and the conditions
573  * under which TSO will be attempted hold true.
574  */
575 static void efx_tso_check_safe(struct sk_buff *skb)
576 {
577         __be16 protocol = skb->protocol;
578
579         EFX_BUG_ON_PARANOID(((struct ethhdr *)skb->data)->h_proto !=
580                             protocol);
581         if (protocol == htons(ETH_P_8021Q)) {
582                 /* Find the encapsulated protocol; reset network header
583                  * and transport header based on that. */
584                 struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
585                 protocol = veh->h_vlan_encapsulated_proto;
586                 skb_set_network_header(skb, sizeof(*veh));
587                 if (protocol == htons(ETH_P_IP))
588                         skb_set_transport_header(skb, sizeof(*veh) +
589                                                  4 * ip_hdr(skb)->ihl);
590         }
591
592         EFX_BUG_ON_PARANOID(protocol != htons(ETH_P_IP));
593         EFX_BUG_ON_PARANOID(ip_hdr(skb)->protocol != IPPROTO_TCP);
594         EFX_BUG_ON_PARANOID((PTR_DIFF(tcp_hdr(skb), skb->data)
595                              + (tcp_hdr(skb)->doff << 2u)) >
596                             skb_headlen(skb));
597 }
598
599
600 /*
601  * Allocate a page worth of efx_tso_header structures, and string them
602  * into the tx_queue->tso_headers_free linked list. Return 0 or -ENOMEM.
603  */
604 static int efx_tsoh_block_alloc(struct efx_tx_queue *tx_queue)
605 {
606
607         struct pci_dev *pci_dev = tx_queue->efx->pci_dev;
608         struct efx_tso_header *tsoh;
609         dma_addr_t dma_addr;
610         u8 *base_kva, *kva;
611
612         base_kva = pci_alloc_consistent(pci_dev, PAGE_SIZE, &dma_addr);
613         if (base_kva == NULL) {
614                 EFX_ERR(tx_queue->efx, "Unable to allocate page for TSO"
615                         " headers\n");
616                 return -ENOMEM;
617         }
618
619         /* pci_alloc_consistent() allocates pages. */
620         EFX_BUG_ON_PARANOID(dma_addr & (PAGE_SIZE - 1u));
621
622         for (kva = base_kva; kva < base_kva + PAGE_SIZE; kva += TSOH_STD_SIZE) {
623                 tsoh = (struct efx_tso_header *)kva;
624                 tsoh->dma_addr = dma_addr + (TSOH_BUFFER(tsoh) - base_kva);
625                 tsoh->next = tx_queue->tso_headers_free;
626                 tx_queue->tso_headers_free = tsoh;
627         }
628
629         return 0;
630 }
631
632
633 /* Free up a TSO header, and all others in the same page. */
634 static void efx_tsoh_block_free(struct efx_tx_queue *tx_queue,
635                                 struct efx_tso_header *tsoh,
636                                 struct pci_dev *pci_dev)
637 {
638         struct efx_tso_header **p;
639         unsigned long base_kva;
640         dma_addr_t base_dma;
641
642         base_kva = (unsigned long)tsoh & PAGE_MASK;
643         base_dma = tsoh->dma_addr & PAGE_MASK;
644
645         p = &tx_queue->tso_headers_free;
646         while (*p != NULL) {
647                 if (((unsigned long)*p & PAGE_MASK) == base_kva)
648                         *p = (*p)->next;
649                 else
650                         p = &(*p)->next;
651         }
652
653         pci_free_consistent(pci_dev, PAGE_SIZE, (void *)base_kva, base_dma);
654 }
655
656 static struct efx_tso_header *
657 efx_tsoh_heap_alloc(struct efx_tx_queue *tx_queue, size_t header_len)
658 {
659         struct efx_tso_header *tsoh;
660
661         tsoh = kmalloc(TSOH_SIZE(header_len), GFP_ATOMIC | GFP_DMA);
662         if (unlikely(!tsoh))
663                 return NULL;
664
665         tsoh->dma_addr = pci_map_single(tx_queue->efx->pci_dev,
666                                         TSOH_BUFFER(tsoh), header_len,
667                                         PCI_DMA_TODEVICE);
668         if (unlikely(pci_dma_mapping_error(tx_queue->efx->pci_dev,
669                                            tsoh->dma_addr))) {
670                 kfree(tsoh);
671                 return NULL;
672         }
673
674         tsoh->unmap_len = header_len;
675         return tsoh;
676 }
677
678 static void
679 efx_tsoh_heap_free(struct efx_tx_queue *tx_queue, struct efx_tso_header *tsoh)
680 {
681         pci_unmap_single(tx_queue->efx->pci_dev,
682                          tsoh->dma_addr, tsoh->unmap_len,
683                          PCI_DMA_TODEVICE);
684         kfree(tsoh);
685 }
686
687 /**
688  * efx_tx_queue_insert - push descriptors onto the TX queue
689  * @tx_queue:           Efx TX queue
690  * @dma_addr:           DMA address of fragment
691  * @len:                Length of fragment
692  * @final_buffer:       The final buffer inserted into the queue
693  *
694  * Push descriptors onto the TX queue.  Return 0 on success or 1 if
695  * @tx_queue full.
696  */
697 static int efx_tx_queue_insert(struct efx_tx_queue *tx_queue,
698                                dma_addr_t dma_addr, unsigned len,
699                                struct efx_tx_buffer **final_buffer)
700 {
701         struct efx_tx_buffer *buffer;
702         struct efx_nic *efx = tx_queue->efx;
703         unsigned dma_len, fill_level, insert_ptr;
704         int q_space;
705
706         EFX_BUG_ON_PARANOID(len <= 0);
707
708         fill_level = tx_queue->insert_count - tx_queue->old_read_count;
709         /* -1 as there is no way to represent all descriptors used */
710         q_space = EFX_TXQ_MASK - 1 - fill_level;
711
712         while (1) {
713                 if (unlikely(q_space-- <= 0)) {
714                         /* It might be that completions have happened
715                          * since the xmit path last checked.  Update
716                          * the xmit path's copy of read_count.
717                          */
718                         ++tx_queue->stopped;
719                         /* This memory barrier protects the change of
720                          * stopped from the access of read_count. */
721                         smp_mb();
722                         tx_queue->old_read_count =
723                                 *(volatile unsigned *)&tx_queue->read_count;
724                         fill_level = (tx_queue->insert_count
725                                       - tx_queue->old_read_count);
726                         q_space = EFX_TXQ_MASK - 1 - fill_level;
727                         if (unlikely(q_space-- <= 0)) {
728                                 *final_buffer = NULL;
729                                 return 1;
730                         }
731                         smp_mb();
732                         --tx_queue->stopped;
733                 }
734
735                 insert_ptr = tx_queue->insert_count & EFX_TXQ_MASK;
736                 buffer = &tx_queue->buffer[insert_ptr];
737                 ++tx_queue->insert_count;
738
739                 EFX_BUG_ON_PARANOID(tx_queue->insert_count -
740                                     tx_queue->read_count >
741                                     EFX_TXQ_MASK);
742
743                 efx_tsoh_free(tx_queue, buffer);
744                 EFX_BUG_ON_PARANOID(buffer->len);
745                 EFX_BUG_ON_PARANOID(buffer->unmap_len);
746                 EFX_BUG_ON_PARANOID(buffer->skb);
747                 EFX_BUG_ON_PARANOID(!buffer->continuation);
748                 EFX_BUG_ON_PARANOID(buffer->tsoh);
749
750                 buffer->dma_addr = dma_addr;
751
752                 dma_len = efx_max_tx_len(efx, dma_addr);
753
754                 /* If there is enough space to send then do so */
755                 if (dma_len >= len)
756                         break;
757
758                 buffer->len = dma_len; /* Don't set the other members */
759                 dma_addr += dma_len;
760                 len -= dma_len;
761         }
762
763         EFX_BUG_ON_PARANOID(!len);
764         buffer->len = len;
765         *final_buffer = buffer;
766         return 0;
767 }
768
769
770 /*
771  * Put a TSO header into the TX queue.
772  *
773  * This is special-cased because we know that it is small enough to fit in
774  * a single fragment, and we know it doesn't cross a page boundary.  It
775  * also allows us to not worry about end-of-packet etc.
776  */
777 static void efx_tso_put_header(struct efx_tx_queue *tx_queue,
778                                struct efx_tso_header *tsoh, unsigned len)
779 {
780         struct efx_tx_buffer *buffer;
781
782         buffer = &tx_queue->buffer[tx_queue->insert_count & EFX_TXQ_MASK];
783         efx_tsoh_free(tx_queue, buffer);
784         EFX_BUG_ON_PARANOID(buffer->len);
785         EFX_BUG_ON_PARANOID(buffer->unmap_len);
786         EFX_BUG_ON_PARANOID(buffer->skb);
787         EFX_BUG_ON_PARANOID(!buffer->continuation);
788         EFX_BUG_ON_PARANOID(buffer->tsoh);
789         buffer->len = len;
790         buffer->dma_addr = tsoh->dma_addr;
791         buffer->tsoh = tsoh;
792
793         ++tx_queue->insert_count;
794 }
795
796
797 /* Remove descriptors put into a tx_queue. */
798 static void efx_enqueue_unwind(struct efx_tx_queue *tx_queue)
799 {
800         struct efx_tx_buffer *buffer;
801         dma_addr_t unmap_addr;
802
803         /* Work backwards until we hit the original insert pointer value */
804         while (tx_queue->insert_count != tx_queue->write_count) {
805                 --tx_queue->insert_count;
806                 buffer = &tx_queue->buffer[tx_queue->insert_count &
807                                            EFX_TXQ_MASK];
808                 efx_tsoh_free(tx_queue, buffer);
809                 EFX_BUG_ON_PARANOID(buffer->skb);
810                 buffer->len = 0;
811                 buffer->continuation = true;
812                 if (buffer->unmap_len) {
813                         unmap_addr = (buffer->dma_addr + buffer->len -
814                                       buffer->unmap_len);
815                         if (buffer->unmap_single)
816                                 pci_unmap_single(tx_queue->efx->pci_dev,
817                                                  unmap_addr, buffer->unmap_len,
818                                                  PCI_DMA_TODEVICE);
819                         else
820                                 pci_unmap_page(tx_queue->efx->pci_dev,
821                                                unmap_addr, buffer->unmap_len,
822                                                PCI_DMA_TODEVICE);
823                         buffer->unmap_len = 0;
824                 }
825         }
826 }
827
828
829 /* Parse the SKB header and initialise state. */
830 static void tso_start(struct tso_state *st, const struct sk_buff *skb)
831 {
832         /* All ethernet/IP/TCP headers combined size is TCP header size
833          * plus offset of TCP header relative to start of packet.
834          */
835         st->header_len = ((tcp_hdr(skb)->doff << 2u)
836                           + PTR_DIFF(tcp_hdr(skb), skb->data));
837         st->full_packet_size = st->header_len + skb_shinfo(skb)->gso_size;
838
839         st->ipv4_id = ntohs(ip_hdr(skb)->id);
840         st->seqnum = ntohl(tcp_hdr(skb)->seq);
841
842         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->urg);
843         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->syn);
844         EFX_BUG_ON_PARANOID(tcp_hdr(skb)->rst);
845
846         st->packet_space = st->full_packet_size;
847         st->out_len = skb->len - st->header_len;
848         st->unmap_len = 0;
849         st->unmap_single = false;
850 }
851
852 static int tso_get_fragment(struct tso_state *st, struct efx_nic *efx,
853                             skb_frag_t *frag)
854 {
855         st->unmap_addr = pci_map_page(efx->pci_dev, frag->page,
856                                       frag->page_offset, frag->size,
857                                       PCI_DMA_TODEVICE);
858         if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
859                 st->unmap_single = false;
860                 st->unmap_len = frag->size;
861                 st->in_len = frag->size;
862                 st->dma_addr = st->unmap_addr;
863                 return 0;
864         }
865         return -ENOMEM;
866 }
867
868 static int tso_get_head_fragment(struct tso_state *st, struct efx_nic *efx,
869                                  const struct sk_buff *skb)
870 {
871         int hl = st->header_len;
872         int len = skb_headlen(skb) - hl;
873
874         st->unmap_addr = pci_map_single(efx->pci_dev, skb->data + hl,
875                                         len, PCI_DMA_TODEVICE);
876         if (likely(!pci_dma_mapping_error(efx->pci_dev, st->unmap_addr))) {
877                 st->unmap_single = true;
878                 st->unmap_len = len;
879                 st->in_len = len;
880                 st->dma_addr = st->unmap_addr;
881                 return 0;
882         }
883         return -ENOMEM;
884 }
885
886
887 /**
888  * tso_fill_packet_with_fragment - form descriptors for the current fragment
889  * @tx_queue:           Efx TX queue
890  * @skb:                Socket buffer
891  * @st:                 TSO state
892  *
893  * Form descriptors for the current fragment, until we reach the end
894  * of fragment or end-of-packet.  Return 0 on success, 1 if not enough
895  * space in @tx_queue.
896  */
897 static int tso_fill_packet_with_fragment(struct efx_tx_queue *tx_queue,
898                                          const struct sk_buff *skb,
899                                          struct tso_state *st)
900 {
901         struct efx_tx_buffer *buffer;
902         int n, end_of_packet, rc;
903
904         if (st->in_len == 0)
905                 return 0;
906         if (st->packet_space == 0)
907                 return 0;
908
909         EFX_BUG_ON_PARANOID(st->in_len <= 0);
910         EFX_BUG_ON_PARANOID(st->packet_space <= 0);
911
912         n = min(st->in_len, st->packet_space);
913
914         st->packet_space -= n;
915         st->out_len -= n;
916         st->in_len -= n;
917
918         rc = efx_tx_queue_insert(tx_queue, st->dma_addr, n, &buffer);
919         if (likely(rc == 0)) {
920                 if (st->out_len == 0)
921                         /* Transfer ownership of the skb */
922                         buffer->skb = skb;
923
924                 end_of_packet = st->out_len == 0 || st->packet_space == 0;
925                 buffer->continuation = !end_of_packet;
926
927                 if (st->in_len == 0) {
928                         /* Transfer ownership of the pci mapping */
929                         buffer->unmap_len = st->unmap_len;
930                         buffer->unmap_single = st->unmap_single;
931                         st->unmap_len = 0;
932                 }
933         }
934
935         st->dma_addr += n;
936         return rc;
937 }
938
939
940 /**
941  * tso_start_new_packet - generate a new header and prepare for the new packet
942  * @tx_queue:           Efx TX queue
943  * @skb:                Socket buffer
944  * @st:                 TSO state
945  *
946  * Generate a new header and prepare for the new packet.  Return 0 on
947  * success, or -1 if failed to alloc header.
948  */
949 static int tso_start_new_packet(struct efx_tx_queue *tx_queue,
950                                 const struct sk_buff *skb,
951                                 struct tso_state *st)
952 {
953         struct efx_tso_header *tsoh;
954         struct iphdr *tsoh_iph;
955         struct tcphdr *tsoh_th;
956         unsigned ip_length;
957         u8 *header;
958
959         /* Allocate a DMA-mapped header buffer. */
960         if (likely(TSOH_SIZE(st->header_len) <= TSOH_STD_SIZE)) {
961                 if (tx_queue->tso_headers_free == NULL) {
962                         if (efx_tsoh_block_alloc(tx_queue))
963                                 return -1;
964                 }
965                 EFX_BUG_ON_PARANOID(!tx_queue->tso_headers_free);
966                 tsoh = tx_queue->tso_headers_free;
967                 tx_queue->tso_headers_free = tsoh->next;
968                 tsoh->unmap_len = 0;
969         } else {
970                 tx_queue->tso_long_headers++;
971                 tsoh = efx_tsoh_heap_alloc(tx_queue, st->header_len);
972                 if (unlikely(!tsoh))
973                         return -1;
974         }
975
976         header = TSOH_BUFFER(tsoh);
977         tsoh_th = (struct tcphdr *)(header + SKB_TCP_OFF(skb));
978         tsoh_iph = (struct iphdr *)(header + SKB_IPV4_OFF(skb));
979
980         /* Copy and update the headers. */
981         memcpy(header, skb->data, st->header_len);
982
983         tsoh_th->seq = htonl(st->seqnum);
984         st->seqnum += skb_shinfo(skb)->gso_size;
985         if (st->out_len > skb_shinfo(skb)->gso_size) {
986                 /* This packet will not finish the TSO burst. */
987                 ip_length = st->full_packet_size - ETH_HDR_LEN(skb);
988                 tsoh_th->fin = 0;
989                 tsoh_th->psh = 0;
990         } else {
991                 /* This packet will be the last in the TSO burst. */
992                 ip_length = st->header_len - ETH_HDR_LEN(skb) + st->out_len;
993                 tsoh_th->fin = tcp_hdr(skb)->fin;
994                 tsoh_th->psh = tcp_hdr(skb)->psh;
995         }
996         tsoh_iph->tot_len = htons(ip_length);
997
998         /* Linux leaves suitable gaps in the IP ID space for us to fill. */
999         tsoh_iph->id = htons(st->ipv4_id);
1000         st->ipv4_id++;
1001
1002         st->packet_space = skb_shinfo(skb)->gso_size;
1003         ++tx_queue->tso_packets;
1004
1005         /* Form a descriptor for this header. */
1006         efx_tso_put_header(tx_queue, tsoh, st->header_len);
1007
1008         return 0;
1009 }
1010
1011
1012 /**
1013  * efx_enqueue_skb_tso - segment and transmit a TSO socket buffer
1014  * @tx_queue:           Efx TX queue
1015  * @skb:                Socket buffer
1016  *
1017  * Context: You must hold netif_tx_lock() to call this function.
1018  *
1019  * Add socket buffer @skb to @tx_queue, doing TSO or return != 0 if
1020  * @skb was not enqueued.  In all cases @skb is consumed.  Return
1021  * %NETDEV_TX_OK or %NETDEV_TX_BUSY.
1022  */
1023 static int efx_enqueue_skb_tso(struct efx_tx_queue *tx_queue,
1024                                struct sk_buff *skb)
1025 {
1026         struct efx_nic *efx = tx_queue->efx;
1027         int frag_i, rc, rc2 = NETDEV_TX_OK;
1028         struct tso_state state;
1029
1030         /* Verify TSO is safe - these checks should never fail. */
1031         efx_tso_check_safe(skb);
1032
1033         EFX_BUG_ON_PARANOID(tx_queue->write_count != tx_queue->insert_count);
1034
1035         tso_start(&state, skb);
1036
1037         /* Assume that skb header area contains exactly the headers, and
1038          * all payload is in the frag list.
1039          */
1040         if (skb_headlen(skb) == state.header_len) {
1041                 /* Grab the first payload fragment. */
1042                 EFX_BUG_ON_PARANOID(skb_shinfo(skb)->nr_frags < 1);
1043                 frag_i = 0;
1044                 rc = tso_get_fragment(&state, efx,
1045                                       skb_shinfo(skb)->frags + frag_i);
1046                 if (rc)
1047                         goto mem_err;
1048         } else {
1049                 rc = tso_get_head_fragment(&state, efx, skb);
1050                 if (rc)
1051                         goto mem_err;
1052                 frag_i = -1;
1053         }
1054
1055         if (tso_start_new_packet(tx_queue, skb, &state) < 0)
1056                 goto mem_err;
1057
1058         while (1) {
1059                 rc = tso_fill_packet_with_fragment(tx_queue, skb, &state);
1060                 if (unlikely(rc))
1061                         goto stop;
1062
1063                 /* Move onto the next fragment? */
1064                 if (state.in_len == 0) {
1065                         if (++frag_i >= skb_shinfo(skb)->nr_frags)
1066                                 /* End of payload reached. */
1067                                 break;
1068                         rc = tso_get_fragment(&state, efx,
1069                                               skb_shinfo(skb)->frags + frag_i);
1070                         if (rc)
1071                                 goto mem_err;
1072                 }
1073
1074                 /* Start at new packet? */
1075                 if (state.packet_space == 0 &&
1076                     tso_start_new_packet(tx_queue, skb, &state) < 0)
1077                         goto mem_err;
1078         }
1079
1080         /* Pass off to hardware */
1081         efx_nic_push_buffers(tx_queue);
1082
1083         tx_queue->tso_bursts++;
1084         return NETDEV_TX_OK;
1085
1086  mem_err:
1087         EFX_ERR(efx, "Out of memory for TSO headers, or PCI mapping error\n");
1088         dev_kfree_skb_any(skb);
1089         goto unwind;
1090
1091  stop:
1092         rc2 = NETDEV_TX_BUSY;
1093
1094         /* Stop the queue if it wasn't stopped before. */
1095         if (tx_queue->stopped == 1)
1096                 efx_stop_queue(efx);
1097
1098  unwind:
1099         /* Free the DMA mapping we were in the process of writing out */
1100         if (state.unmap_len) {
1101                 if (state.unmap_single)
1102                         pci_unmap_single(efx->pci_dev, state.unmap_addr,
1103                                          state.unmap_len, PCI_DMA_TODEVICE);
1104                 else
1105                         pci_unmap_page(efx->pci_dev, state.unmap_addr,
1106                                        state.unmap_len, PCI_DMA_TODEVICE);
1107         }
1108
1109         efx_enqueue_unwind(tx_queue);
1110         return rc2;
1111 }
1112
1113
1114 /*
1115  * Free up all TSO datastructures associated with tx_queue. This
1116  * routine should be called only once the tx_queue is both empty and
1117  * will no longer be used.
1118  */
1119 static void efx_fini_tso(struct efx_tx_queue *tx_queue)
1120 {
1121         unsigned i;
1122
1123         if (tx_queue->buffer) {
1124                 for (i = 0; i <= EFX_TXQ_MASK; ++i)
1125                         efx_tsoh_free(tx_queue, &tx_queue->buffer[i]);
1126         }
1127
1128         while (tx_queue->tso_headers_free != NULL)
1129                 efx_tsoh_block_free(tx_queue, tx_queue->tso_headers_free,
1130                                     tx_queue->efx->pci_dev);
1131 }