]> bbs.cooldavid.org Git - net-next-2.6.git/blame - drivers/net/sfc/rx.c
sfc: Update version, copyright dates, authors
[net-next-2.6.git] / drivers / net / sfc / rx.c
CommitLineData
8ceee660
BH
1/****************************************************************************
2 * Driver for Solarflare Solarstorm network controllers and boards
3 * Copyright 2005-2006 Fen Systems Ltd.
906bb26c 4 * Copyright 2005-2009 Solarflare Communications Inc.
8ceee660
BH
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License version 2 as published
8 * by the Free Software Foundation, incorporated herein by reference.
9 */
10
11#include <linux/socket.h>
12#include <linux/in.h>
13#include <linux/ip.h>
14#include <linux/tcp.h>
15#include <linux/udp.h>
16#include <net/ip.h>
17#include <net/checksum.h>
18#include "net_driver.h"
8ceee660 19#include "efx.h"
744093c9 20#include "nic.h"
3273c2e8 21#include "selftest.h"
8ceee660
BH
22#include "workarounds.h"
23
24/* Number of RX descriptors pushed at once. */
25#define EFX_RX_BATCH 8
26
27/* Size of buffer allocated for skb header area. */
28#define EFX_SKB_HEADERS 64u
29
30/*
31 * rx_alloc_method - RX buffer allocation method
32 *
33 * This driver supports two methods for allocating and using RX buffers:
34 * each RX buffer may be backed by an skb or by an order-n page.
35 *
36 * When LRO is in use then the second method has a lower overhead,
37 * since we don't have to allocate then free skbs on reassembled frames.
38 *
39 * Values:
40 * - RX_ALLOC_METHOD_AUTO = 0
41 * - RX_ALLOC_METHOD_SKB = 1
42 * - RX_ALLOC_METHOD_PAGE = 2
43 *
44 * The heuristic for %RX_ALLOC_METHOD_AUTO is a simple hysteresis count
45 * controlled by the parameters below.
46 *
47 * - Since pushing and popping descriptors are separated by the rx_queue
48 * size, so the watermarks should be ~rxd_size.
49 * - The performance win by using page-based allocation for LRO is less
50 * than the performance hit of using page-based allocation of non-LRO,
51 * so the watermarks should reflect this.
52 *
53 * Per channel we maintain a single variable, updated by each channel:
54 *
55 * rx_alloc_level += (lro_performed ? RX_ALLOC_FACTOR_LRO :
56 * RX_ALLOC_FACTOR_SKB)
57 * Per NAPI poll interval, we constrain rx_alloc_level to 0..MAX (which
58 * limits the hysteresis), and update the allocation strategy:
59 *
60 * rx_alloc_method = (rx_alloc_level > RX_ALLOC_LEVEL_LRO ?
61 * RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB)
62 */
c3c63365 63static int rx_alloc_method = RX_ALLOC_METHOD_AUTO;
8ceee660
BH
64
65#define RX_ALLOC_LEVEL_LRO 0x2000
66#define RX_ALLOC_LEVEL_MAX 0x3000
67#define RX_ALLOC_FACTOR_LRO 1
68#define RX_ALLOC_FACTOR_SKB (-2)
69
70/* This is the percentage fill level below which new RX descriptors
71 * will be added to the RX descriptor ring.
72 */
73static unsigned int rx_refill_threshold = 90;
74
75/* This is the percentage fill level to which an RX queue will be refilled
76 * when the "RX refill threshold" is reached.
77 */
78static unsigned int rx_refill_limit = 95;
79
80/*
81 * RX maximum head room required.
82 *
83 * This must be at least 1 to prevent overflow and at least 2 to allow
84 * pipelined receives.
85 */
86#define EFX_RXD_HEAD_ROOM 2
87
55668611
BH
88static inline unsigned int efx_rx_buf_offset(struct efx_rx_buffer *buf)
89{
90 /* Offset is always within one page, so we don't need to consider
91 * the page order.
92 */
184be0c2 93 return (__force unsigned long) buf->data & (PAGE_SIZE - 1);
55668611
BH
94}
95static inline unsigned int efx_rx_buf_size(struct efx_nic *efx)
96{
97 return PAGE_SIZE << efx->rx_buffer_order;
98}
8ceee660
BH
99
100
8ceee660
BH
101/**
102 * efx_init_rx_buffer_skb - create new RX buffer using skb-based allocation
103 *
104 * @rx_queue: Efx RX queue
105 * @rx_buf: RX buffer structure to populate
106 *
107 * This allocates memory for a new receive buffer, maps it for DMA,
108 * and populates a struct efx_rx_buffer with the relevant
109 * information. Return a negative error code or 0 on success.
110 */
4d566063
BH
111static int efx_init_rx_buffer_skb(struct efx_rx_queue *rx_queue,
112 struct efx_rx_buffer *rx_buf)
8ceee660
BH
113{
114 struct efx_nic *efx = rx_queue->efx;
115 struct net_device *net_dev = efx->net_dev;
116 int skb_len = efx->rx_buffer_len;
117
118 rx_buf->skb = netdev_alloc_skb(net_dev, skb_len);
119 if (unlikely(!rx_buf->skb))
120 return -ENOMEM;
121
122 /* Adjust the SKB for padding and checksum */
123 skb_reserve(rx_buf->skb, NET_IP_ALIGN);
124 rx_buf->len = skb_len - NET_IP_ALIGN;
125 rx_buf->data = (char *)rx_buf->skb->data;
126 rx_buf->skb->ip_summed = CHECKSUM_UNNECESSARY;
127
128 rx_buf->dma_addr = pci_map_single(efx->pci_dev,
129 rx_buf->data, rx_buf->len,
130 PCI_DMA_FROMDEVICE);
131
8d8bb39b 132 if (unlikely(pci_dma_mapping_error(efx->pci_dev, rx_buf->dma_addr))) {
8ceee660
BH
133 dev_kfree_skb_any(rx_buf->skb);
134 rx_buf->skb = NULL;
135 return -EIO;
136 }
137
138 return 0;
139}
140
141/**
142 * efx_init_rx_buffer_page - create new RX buffer using page-based allocation
143 *
144 * @rx_queue: Efx RX queue
145 * @rx_buf: RX buffer structure to populate
146 *
147 * This allocates memory for a new receive buffer, maps it for DMA,
148 * and populates a struct efx_rx_buffer with the relevant
149 * information. Return a negative error code or 0 on success.
150 */
4d566063
BH
151static int efx_init_rx_buffer_page(struct efx_rx_queue *rx_queue,
152 struct efx_rx_buffer *rx_buf)
8ceee660
BH
153{
154 struct efx_nic *efx = rx_queue->efx;
155 int bytes, space, offset;
156
157 bytes = efx->rx_buffer_len - EFX_PAGE_IP_ALIGN;
158
159 /* If there is space left in the previously allocated page,
160 * then use it. Otherwise allocate a new one */
161 rx_buf->page = rx_queue->buf_page;
162 if (rx_buf->page == NULL) {
163 dma_addr_t dma_addr;
164
165 rx_buf->page = alloc_pages(__GFP_COLD | __GFP_COMP | GFP_ATOMIC,
166 efx->rx_buffer_order);
167 if (unlikely(rx_buf->page == NULL))
168 return -ENOMEM;
169
170 dma_addr = pci_map_page(efx->pci_dev, rx_buf->page,
55668611 171 0, efx_rx_buf_size(efx),
8ceee660
BH
172 PCI_DMA_FROMDEVICE);
173
8d8bb39b 174 if (unlikely(pci_dma_mapping_error(efx->pci_dev, dma_addr))) {
8ceee660
BH
175 __free_pages(rx_buf->page, efx->rx_buffer_order);
176 rx_buf->page = NULL;
177 return -EIO;
178 }
179
180 rx_queue->buf_page = rx_buf->page;
181 rx_queue->buf_dma_addr = dma_addr;
d3208b5e 182 rx_queue->buf_data = (page_address(rx_buf->page) +
8ceee660
BH
183 EFX_PAGE_IP_ALIGN);
184 }
185
8ceee660 186 rx_buf->len = bytes;
8ceee660 187 rx_buf->data = rx_queue->buf_data;
184be0c2
BH
188 offset = efx_rx_buf_offset(rx_buf);
189 rx_buf->dma_addr = rx_queue->buf_dma_addr + offset;
8ceee660
BH
190
191 /* Try to pack multiple buffers per page */
192 if (efx->rx_buffer_order == 0) {
193 /* The next buffer starts on the next 512 byte boundary */
194 rx_queue->buf_data += ((bytes + 0x1ff) & ~0x1ff);
195 offset += ((bytes + 0x1ff) & ~0x1ff);
196
55668611 197 space = efx_rx_buf_size(efx) - offset;
8ceee660
BH
198 if (space >= bytes) {
199 /* Refs dropped on kernel releasing each skb */
200 get_page(rx_queue->buf_page);
201 goto out;
202 }
203 }
204
205 /* This is the final RX buffer for this page, so mark it for
206 * unmapping */
207 rx_queue->buf_page = NULL;
208 rx_buf->unmap_addr = rx_queue->buf_dma_addr;
209
210 out:
211 return 0;
212}
213
214/* This allocates memory for a new receive buffer, maps it for DMA,
215 * and populates a struct efx_rx_buffer with the relevant
216 * information.
217 */
4d566063
BH
218static int efx_init_rx_buffer(struct efx_rx_queue *rx_queue,
219 struct efx_rx_buffer *new_rx_buf)
8ceee660
BH
220{
221 int rc = 0;
222
223 if (rx_queue->channel->rx_alloc_push_pages) {
224 new_rx_buf->skb = NULL;
225 rc = efx_init_rx_buffer_page(rx_queue, new_rx_buf);
226 rx_queue->alloc_page_count++;
227 } else {
228 new_rx_buf->page = NULL;
229 rc = efx_init_rx_buffer_skb(rx_queue, new_rx_buf);
230 rx_queue->alloc_skb_count++;
231 }
232
233 if (unlikely(rc < 0))
234 EFX_LOG_RL(rx_queue->efx, "%s RXQ[%d] =%d\n", __func__,
235 rx_queue->queue, rc);
236 return rc;
237}
238
4d566063
BH
239static void efx_unmap_rx_buffer(struct efx_nic *efx,
240 struct efx_rx_buffer *rx_buf)
8ceee660
BH
241{
242 if (rx_buf->page) {
243 EFX_BUG_ON_PARANOID(rx_buf->skb);
244 if (rx_buf->unmap_addr) {
245 pci_unmap_page(efx->pci_dev, rx_buf->unmap_addr,
55668611
BH
246 efx_rx_buf_size(efx),
247 PCI_DMA_FROMDEVICE);
8ceee660
BH
248 rx_buf->unmap_addr = 0;
249 }
250 } else if (likely(rx_buf->skb)) {
251 pci_unmap_single(efx->pci_dev, rx_buf->dma_addr,
252 rx_buf->len, PCI_DMA_FROMDEVICE);
253 }
254}
255
4d566063
BH
256static void efx_free_rx_buffer(struct efx_nic *efx,
257 struct efx_rx_buffer *rx_buf)
8ceee660
BH
258{
259 if (rx_buf->page) {
260 __free_pages(rx_buf->page, efx->rx_buffer_order);
261 rx_buf->page = NULL;
262 } else if (likely(rx_buf->skb)) {
263 dev_kfree_skb_any(rx_buf->skb);
264 rx_buf->skb = NULL;
265 }
266}
267
4d566063
BH
268static void efx_fini_rx_buffer(struct efx_rx_queue *rx_queue,
269 struct efx_rx_buffer *rx_buf)
8ceee660
BH
270{
271 efx_unmap_rx_buffer(rx_queue->efx, rx_buf);
272 efx_free_rx_buffer(rx_queue->efx, rx_buf);
273}
274
275/**
276 * efx_fast_push_rx_descriptors - push new RX descriptors quickly
277 * @rx_queue: RX descriptor queue
278 * @retry: Recheck the fill level
279 * This will aim to fill the RX descriptor queue up to
280 * @rx_queue->@fast_fill_limit. If there is insufficient atomic
281 * memory to do so, the caller should retry.
282 */
283static int __efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue,
284 int retry)
285{
286 struct efx_rx_buffer *rx_buf;
287 unsigned fill_level, index;
288 int i, space, rc = 0;
289
290 /* Calculate current fill level. Do this outside the lock,
291 * because most of the time we'll end up not wanting to do the
292 * fill anyway.
293 */
294 fill_level = (rx_queue->added_count - rx_queue->removed_count);
3ffeabdd 295 EFX_BUG_ON_PARANOID(fill_level > EFX_RXQ_SIZE);
8ceee660
BH
296
297 /* Don't fill if we don't need to */
298 if (fill_level >= rx_queue->fast_fill_trigger)
299 return 0;
300
301 /* Record minimum fill level */
b3475645 302 if (unlikely(fill_level < rx_queue->min_fill)) {
8ceee660
BH
303 if (fill_level)
304 rx_queue->min_fill = fill_level;
b3475645 305 }
8ceee660
BH
306
307 /* Acquire RX add lock. If this lock is contended, then a fast
308 * fill must already be in progress (e.g. in the refill
309 * tasklet), so we don't need to do anything
310 */
311 if (!spin_trylock_bh(&rx_queue->add_lock))
312 return -1;
313
314 retry:
315 /* Recalculate current fill level now that we have the lock */
316 fill_level = (rx_queue->added_count - rx_queue->removed_count);
3ffeabdd 317 EFX_BUG_ON_PARANOID(fill_level > EFX_RXQ_SIZE);
8ceee660
BH
318 space = rx_queue->fast_fill_limit - fill_level;
319 if (space < EFX_RX_BATCH)
320 goto out_unlock;
321
322 EFX_TRACE(rx_queue->efx, "RX queue %d fast-filling descriptor ring from"
323 " level %d to level %d using %s allocation\n",
324 rx_queue->queue, fill_level, rx_queue->fast_fill_limit,
325 rx_queue->channel->rx_alloc_push_pages ? "page" : "skb");
326
327 do {
328 for (i = 0; i < EFX_RX_BATCH; ++i) {
3ffeabdd 329 index = rx_queue->added_count & EFX_RXQ_MASK;
8ceee660
BH
330 rx_buf = efx_rx_buffer(rx_queue, index);
331 rc = efx_init_rx_buffer(rx_queue, rx_buf);
332 if (unlikely(rc))
333 goto out;
334 ++rx_queue->added_count;
335 }
336 } while ((space -= EFX_RX_BATCH) >= EFX_RX_BATCH);
337
338 EFX_TRACE(rx_queue->efx, "RX queue %d fast-filled descriptor ring "
339 "to level %d\n", rx_queue->queue,
340 rx_queue->added_count - rx_queue->removed_count);
341
342 out:
343 /* Send write pointer to card. */
152b6a62 344 efx_nic_notify_rx_desc(rx_queue);
8ceee660
BH
345
346 /* If the fast fill is running inside from the refill tasklet, then
347 * for SMP systems it may be running on a different CPU to
348 * RX event processing, which means that the fill level may now be
349 * out of date. */
350 if (unlikely(retry && (rc == 0)))
351 goto retry;
352
353 out_unlock:
354 spin_unlock_bh(&rx_queue->add_lock);
355
356 return rc;
357}
358
359/**
360 * efx_fast_push_rx_descriptors - push new RX descriptors quickly
361 * @rx_queue: RX descriptor queue
362 *
363 * This will aim to fill the RX descriptor queue up to
364 * @rx_queue->@fast_fill_limit. If there is insufficient memory to do so,
365 * it will schedule a work item to immediately continue the fast fill
366 */
367void efx_fast_push_rx_descriptors(struct efx_rx_queue *rx_queue)
368{
369 int rc;
370
371 rc = __efx_fast_push_rx_descriptors(rx_queue, 0);
372 if (unlikely(rc)) {
373 /* Schedule the work item to run immediately. The hope is
374 * that work is immediately pending to free some memory
375 * (e.g. an RX event or TX completion)
376 */
377 efx_schedule_slow_fill(rx_queue, 0);
378 }
379}
380
381void efx_rx_work(struct work_struct *data)
382{
383 struct efx_rx_queue *rx_queue;
384 int rc;
385
386 rx_queue = container_of(data, struct efx_rx_queue, work.work);
387
388 if (unlikely(!rx_queue->channel->enabled))
389 return;
390
391 EFX_TRACE(rx_queue->efx, "RX queue %d worker thread executing on CPU "
392 "%d\n", rx_queue->queue, raw_smp_processor_id());
393
394 ++rx_queue->slow_fill_count;
395 /* Push new RX descriptors, allowing at least 1 jiffy for
396 * the kernel to free some more memory. */
397 rc = __efx_fast_push_rx_descriptors(rx_queue, 1);
398 if (rc)
399 efx_schedule_slow_fill(rx_queue, 1);
400}
401
4d566063
BH
402static void efx_rx_packet__check_len(struct efx_rx_queue *rx_queue,
403 struct efx_rx_buffer *rx_buf,
404 int len, bool *discard,
405 bool *leak_packet)
8ceee660
BH
406{
407 struct efx_nic *efx = rx_queue->efx;
408 unsigned max_len = rx_buf->len - efx->type->rx_buffer_padding;
409
410 if (likely(len <= max_len))
411 return;
412
413 /* The packet must be discarded, but this is only a fatal error
414 * if the caller indicated it was
415 */
dc8cfa55 416 *discard = true;
8ceee660
BH
417
418 if ((len > rx_buf->len) && EFX_WORKAROUND_8071(efx)) {
419 EFX_ERR_RL(efx, " RX queue %d seriously overlength "
420 "RX event (0x%x > 0x%x+0x%x). Leaking\n",
421 rx_queue->queue, len, max_len,
422 efx->type->rx_buffer_padding);
423 /* If this buffer was skb-allocated, then the meta
424 * data at the end of the skb will be trashed. So
425 * we have no choice but to leak the fragment.
426 */
427 *leak_packet = (rx_buf->skb != NULL);
428 efx_schedule_reset(efx, RESET_TYPE_RX_RECOVERY);
429 } else {
430 EFX_ERR_RL(efx, " RX queue %d overlength RX event "
431 "(0x%x > 0x%x)\n", rx_queue->queue, len, max_len);
432 }
433
434 rx_queue->channel->n_rx_overlength++;
435}
436
437/* Pass a received packet up through the generic LRO stack
438 *
439 * Handles driverlink veto, and passes the fragment up via
440 * the appropriate LRO method
441 */
4d566063 442static void efx_rx_packet_lro(struct efx_channel *channel,
345056af
BH
443 struct efx_rx_buffer *rx_buf,
444 bool checksummed)
8ceee660 445{
da3bc071 446 struct napi_struct *napi = &channel->napi_str;
18e1d2be 447 gro_result_t gro_result;
8ceee660
BH
448
449 /* Pass the skb/page into the LRO engine */
450 if (rx_buf->page) {
1241e951
BH
451 struct page *page = rx_buf->page;
452 struct sk_buff *skb;
8ceee660 453
1241e951
BH
454 EFX_BUG_ON_PARANOID(rx_buf->skb);
455 rx_buf->page = NULL;
456
457 skb = napi_get_frags(napi);
76620aaf 458 if (!skb) {
1241e951
BH
459 put_page(page);
460 return;
76620aaf
HX
461 }
462
1241e951 463 skb_shinfo(skb)->frags[0].page = page;
76620aaf
HX
464 skb_shinfo(skb)->frags[0].page_offset =
465 efx_rx_buf_offset(rx_buf);
466 skb_shinfo(skb)->frags[0].size = rx_buf->len;
467 skb_shinfo(skb)->nr_frags = 1;
468
469 skb->len = rx_buf->len;
470 skb->data_len = rx_buf->len;
471 skb->truesize += rx_buf->len;
345056af
BH
472 skb->ip_summed =
473 checksummed ? CHECKSUM_UNNECESSARY : CHECKSUM_NONE;
8ceee660 474
3eadb7b0
BH
475 skb_record_rx_queue(skb, channel->channel);
476
18e1d2be 477 gro_result = napi_gro_frags(napi);
8ceee660 478 } else {
1241e951 479 struct sk_buff *skb = rx_buf->skb;
8ceee660 480
1241e951
BH
481 EFX_BUG_ON_PARANOID(!skb);
482 EFX_BUG_ON_PARANOID(!checksummed);
8ceee660 483 rx_buf->skb = NULL;
1241e951
BH
484
485 gro_result = napi_gro_receive(napi, skb);
8ceee660 486 }
18e1d2be
BH
487
488 if (gro_result == GRO_NORMAL) {
489 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
490 } else if (gro_result != GRO_DROP) {
491 channel->rx_alloc_level += RX_ALLOC_FACTOR_LRO;
492 channel->irq_mod_score += 2;
493 }
8ceee660
BH
494}
495
8ceee660 496void efx_rx_packet(struct efx_rx_queue *rx_queue, unsigned int index,
dc8cfa55 497 unsigned int len, bool checksummed, bool discard)
8ceee660
BH
498{
499 struct efx_nic *efx = rx_queue->efx;
500 struct efx_rx_buffer *rx_buf;
dc8cfa55 501 bool leak_packet = false;
8ceee660
BH
502
503 rx_buf = efx_rx_buffer(rx_queue, index);
504 EFX_BUG_ON_PARANOID(!rx_buf->data);
505 EFX_BUG_ON_PARANOID(rx_buf->skb && rx_buf->page);
506 EFX_BUG_ON_PARANOID(!(rx_buf->skb || rx_buf->page));
507
508 /* This allows the refill path to post another buffer.
509 * EFX_RXD_HEAD_ROOM ensures that the slot we are using
510 * isn't overwritten yet.
511 */
512 rx_queue->removed_count++;
513
514 /* Validate the length encoded in the event vs the descriptor pushed */
515 efx_rx_packet__check_len(rx_queue, rx_buf, len,
516 &discard, &leak_packet);
517
518 EFX_TRACE(efx, "RX queue %d received id %x at %llx+%x %s%s\n",
519 rx_queue->queue, index,
520 (unsigned long long)rx_buf->dma_addr, len,
521 (checksummed ? " [SUMMED]" : ""),
522 (discard ? " [DISCARD]" : ""));
523
524 /* Discard packet, if instructed to do so */
525 if (unlikely(discard)) {
526 if (unlikely(leak_packet))
527 rx_queue->channel->n_skbuff_leaks++;
528 else
529 /* We haven't called efx_unmap_rx_buffer yet,
530 * so fini the entire rx_buffer here */
531 efx_fini_rx_buffer(rx_queue, rx_buf);
532 return;
533 }
534
535 /* Release card resources - assumes all RX buffers consumed in-order
536 * per RX queue
537 */
538 efx_unmap_rx_buffer(efx, rx_buf);
539
540 /* Prefetch nice and early so data will (hopefully) be in cache by
541 * the time we look at it.
542 */
543 prefetch(rx_buf->data);
544
545 /* Pipeline receives so that we give time for packet headers to be
546 * prefetched into cache.
547 */
548 rx_buf->len = len;
549 if (rx_queue->channel->rx_pkt)
550 __efx_rx_packet(rx_queue->channel,
551 rx_queue->channel->rx_pkt,
552 rx_queue->channel->rx_pkt_csummed);
553 rx_queue->channel->rx_pkt = rx_buf;
554 rx_queue->channel->rx_pkt_csummed = checksummed;
555}
556
557/* Handle a received packet. Second half: Touches packet payload. */
558void __efx_rx_packet(struct efx_channel *channel,
dc8cfa55 559 struct efx_rx_buffer *rx_buf, bool checksummed)
8ceee660
BH
560{
561 struct efx_nic *efx = channel->efx;
562 struct sk_buff *skb;
8ceee660 563
3273c2e8
BH
564 /* If we're in loopback test, then pass the packet directly to the
565 * loopback layer, and free the rx_buf here
566 */
567 if (unlikely(efx->loopback_selftest)) {
568 efx_loopback_rx_packet(efx, rx_buf->data, rx_buf->len);
569 efx_free_rx_buffer(efx, rx_buf);
d96d7dc9 570 return;
3273c2e8
BH
571 }
572
8ceee660
BH
573 if (rx_buf->skb) {
574 prefetch(skb_shinfo(rx_buf->skb));
575
576 skb_put(rx_buf->skb, rx_buf->len);
577
578 /* Move past the ethernet header. rx_buf->data still points
579 * at the ethernet header */
580 rx_buf->skb->protocol = eth_type_trans(rx_buf->skb,
581 efx->net_dev);
3eadb7b0
BH
582
583 skb_record_rx_queue(rx_buf->skb, channel->channel);
8ceee660
BH
584 }
585
da3bc071 586 if (likely(checksummed || rx_buf->page)) {
345056af 587 efx_rx_packet_lro(channel, rx_buf, checksummed);
d96d7dc9 588 return;
8ceee660
BH
589 }
590
da3bc071
HX
591 /* We now own the SKB */
592 skb = rx_buf->skb;
593 rx_buf->skb = NULL;
8ceee660
BH
594 EFX_BUG_ON_PARANOID(!skb);
595
596 /* Set the SKB flags */
da3bc071 597 skb->ip_summed = CHECKSUM_NONE;
8ceee660
BH
598
599 /* Pass the packet up */
600 netif_receive_skb(skb);
601
602 /* Update allocation strategy method */
603 channel->rx_alloc_level += RX_ALLOC_FACTOR_SKB;
8ceee660
BH
604}
605
606void efx_rx_strategy(struct efx_channel *channel)
607{
608 enum efx_rx_alloc_method method = rx_alloc_method;
609
610 /* Only makes sense to use page based allocation if LRO is enabled */
da3bc071 611 if (!(channel->efx->net_dev->features & NETIF_F_GRO)) {
8ceee660
BH
612 method = RX_ALLOC_METHOD_SKB;
613 } else if (method == RX_ALLOC_METHOD_AUTO) {
614 /* Constrain the rx_alloc_level */
615 if (channel->rx_alloc_level < 0)
616 channel->rx_alloc_level = 0;
617 else if (channel->rx_alloc_level > RX_ALLOC_LEVEL_MAX)
618 channel->rx_alloc_level = RX_ALLOC_LEVEL_MAX;
619
620 /* Decide on the allocation method */
621 method = ((channel->rx_alloc_level > RX_ALLOC_LEVEL_LRO) ?
622 RX_ALLOC_METHOD_PAGE : RX_ALLOC_METHOD_SKB);
623 }
624
625 /* Push the option */
626 channel->rx_alloc_push_pages = (method == RX_ALLOC_METHOD_PAGE);
627}
628
629int efx_probe_rx_queue(struct efx_rx_queue *rx_queue)
630{
631 struct efx_nic *efx = rx_queue->efx;
632 unsigned int rxq_size;
633 int rc;
634
635 EFX_LOG(efx, "creating RX queue %d\n", rx_queue->queue);
636
637 /* Allocate RX buffers */
3ffeabdd 638 rxq_size = EFX_RXQ_SIZE * sizeof(*rx_queue->buffer);
8ceee660 639 rx_queue->buffer = kzalloc(rxq_size, GFP_KERNEL);
8831da7b
BH
640 if (!rx_queue->buffer)
641 return -ENOMEM;
8ceee660 642
152b6a62 643 rc = efx_nic_probe_rx(rx_queue);
8831da7b
BH
644 if (rc) {
645 kfree(rx_queue->buffer);
646 rx_queue->buffer = NULL;
647 }
8ceee660
BH
648 return rc;
649}
650
bc3c90a2 651void efx_init_rx_queue(struct efx_rx_queue *rx_queue)
8ceee660 652{
8ceee660
BH
653 unsigned int max_fill, trigger, limit;
654
655 EFX_LOG(rx_queue->efx, "initialising RX queue %d\n", rx_queue->queue);
656
657 /* Initialise ptr fields */
658 rx_queue->added_count = 0;
659 rx_queue->notified_count = 0;
660 rx_queue->removed_count = 0;
661 rx_queue->min_fill = -1U;
662 rx_queue->min_overfill = -1U;
663
664 /* Initialise limit fields */
3ffeabdd 665 max_fill = EFX_RXQ_SIZE - EFX_RXD_HEAD_ROOM;
8ceee660
BH
666 trigger = max_fill * min(rx_refill_threshold, 100U) / 100U;
667 limit = max_fill * min(rx_refill_limit, 100U) / 100U;
668
669 rx_queue->max_fill = max_fill;
670 rx_queue->fast_fill_trigger = trigger;
671 rx_queue->fast_fill_limit = limit;
672
673 /* Set up RX descriptor ring */
152b6a62 674 efx_nic_init_rx(rx_queue);
8ceee660
BH
675}
676
677void efx_fini_rx_queue(struct efx_rx_queue *rx_queue)
678{
679 int i;
680 struct efx_rx_buffer *rx_buf;
681
682 EFX_LOG(rx_queue->efx, "shutting down RX queue %d\n", rx_queue->queue);
683
152b6a62 684 efx_nic_fini_rx(rx_queue);
8ceee660
BH
685
686 /* Release RX buffers NB start at index 0 not current HW ptr */
687 if (rx_queue->buffer) {
3ffeabdd 688 for (i = 0; i <= EFX_RXQ_MASK; i++) {
8ceee660
BH
689 rx_buf = efx_rx_buffer(rx_queue, i);
690 efx_fini_rx_buffer(rx_queue, rx_buf);
691 }
692 }
693
694 /* For a page that is part-way through splitting into RX buffers */
695 if (rx_queue->buf_page != NULL) {
696 pci_unmap_page(rx_queue->efx->pci_dev, rx_queue->buf_dma_addr,
55668611
BH
697 efx_rx_buf_size(rx_queue->efx),
698 PCI_DMA_FROMDEVICE);
8ceee660
BH
699 __free_pages(rx_queue->buf_page,
700 rx_queue->efx->rx_buffer_order);
701 rx_queue->buf_page = NULL;
702 }
703}
704
705void efx_remove_rx_queue(struct efx_rx_queue *rx_queue)
706{
707 EFX_LOG(rx_queue->efx, "destroying RX queue %d\n", rx_queue->queue);
708
152b6a62 709 efx_nic_remove_rx(rx_queue);
8ceee660
BH
710
711 kfree(rx_queue->buffer);
712 rx_queue->buffer = NULL;
8ceee660
BH
713}
714
8ceee660
BH
715
716module_param(rx_alloc_method, int, 0644);
717MODULE_PARM_DESC(rx_alloc_method, "Allocation method used for RX buffers");
718
719module_param(rx_refill_threshold, uint, 0444);
720MODULE_PARM_DESC(rx_refill_threshold,
721 "RX descriptor ring fast/slow fill threshold (%)");
722