]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
958305e92d679e3354d366c7d2a3dcaa91b422ab
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "1.3.16-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static struct pci_device_id igb_pci_tbl[] = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
74         /* required last entry */
75         {0, }
76 };
77
78 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
79
80 void igb_reset(struct igb_adapter *);
81 static int igb_setup_all_tx_resources(struct igb_adapter *);
82 static int igb_setup_all_rx_resources(struct igb_adapter *);
83 static void igb_free_all_tx_resources(struct igb_adapter *);
84 static void igb_free_all_rx_resources(struct igb_adapter *);
85 static void igb_setup_mrqc(struct igb_adapter *);
86 void igb_update_stats(struct igb_adapter *);
87 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
88 static void __devexit igb_remove(struct pci_dev *pdev);
89 static int igb_sw_init(struct igb_adapter *);
90 static int igb_open(struct net_device *);
91 static int igb_close(struct net_device *);
92 static void igb_configure_tx(struct igb_adapter *);
93 static void igb_configure_rx(struct igb_adapter *);
94 static void igb_clean_all_tx_rings(struct igb_adapter *);
95 static void igb_clean_all_rx_rings(struct igb_adapter *);
96 static void igb_clean_tx_ring(struct igb_ring *);
97 static void igb_clean_rx_ring(struct igb_ring *);
98 static void igb_set_rx_mode(struct net_device *);
99 static void igb_update_phy_info(unsigned long);
100 static void igb_watchdog(unsigned long);
101 static void igb_watchdog_task(struct work_struct *);
102 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
103 static struct net_device_stats *igb_get_stats(struct net_device *);
104 static int igb_change_mtu(struct net_device *, int);
105 static int igb_set_mac(struct net_device *, void *);
106 static void igb_set_uta(struct igb_adapter *adapter);
107 static irqreturn_t igb_intr(int irq, void *);
108 static irqreturn_t igb_intr_msi(int irq, void *);
109 static irqreturn_t igb_msix_other(int irq, void *);
110 static irqreturn_t igb_msix_ring(int irq, void *);
111 #ifdef CONFIG_IGB_DCA
112 static void igb_update_dca(struct igb_q_vector *);
113 static void igb_setup_dca(struct igb_adapter *);
114 #endif /* CONFIG_IGB_DCA */
115 static bool igb_clean_tx_irq(struct igb_q_vector *);
116 static int igb_poll(struct napi_struct *, int);
117 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
118 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
119 static void igb_tx_timeout(struct net_device *);
120 static void igb_reset_task(struct work_struct *);
121 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
122 static void igb_vlan_rx_add_vid(struct net_device *, u16);
123 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
124 static void igb_restore_vlan(struct igb_adapter *);
125 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
126 static void igb_ping_all_vfs(struct igb_adapter *);
127 static void igb_msg_task(struct igb_adapter *);
128 static void igb_vmm_control(struct igb_adapter *);
129 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
130 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
131
132 #ifdef CONFIG_PM
133 static int igb_suspend(struct pci_dev *, pm_message_t);
134 static int igb_resume(struct pci_dev *);
135 #endif
136 static void igb_shutdown(struct pci_dev *);
137 #ifdef CONFIG_IGB_DCA
138 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
139 static struct notifier_block dca_notifier = {
140         .notifier_call  = igb_notify_dca,
141         .next           = NULL,
142         .priority       = 0
143 };
144 #endif
145 #ifdef CONFIG_NET_POLL_CONTROLLER
146 /* for netdump / net console */
147 static void igb_netpoll(struct net_device *);
148 #endif
149 #ifdef CONFIG_PCI_IOV
150 static unsigned int max_vfs = 0;
151 module_param(max_vfs, uint, 0);
152 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
153                  "per physical function");
154 #endif /* CONFIG_PCI_IOV */
155
156 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
157                      pci_channel_state_t);
158 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
159 static void igb_io_resume(struct pci_dev *);
160
161 static struct pci_error_handlers igb_err_handler = {
162         .error_detected = igb_io_error_detected,
163         .slot_reset = igb_io_slot_reset,
164         .resume = igb_io_resume,
165 };
166
167
168 static struct pci_driver igb_driver = {
169         .name     = igb_driver_name,
170         .id_table = igb_pci_tbl,
171         .probe    = igb_probe,
172         .remove   = __devexit_p(igb_remove),
173 #ifdef CONFIG_PM
174         /* Power Managment Hooks */
175         .suspend  = igb_suspend,
176         .resume   = igb_resume,
177 #endif
178         .shutdown = igb_shutdown,
179         .err_handler = &igb_err_handler
180 };
181
182 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
183 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
184 MODULE_LICENSE("GPL");
185 MODULE_VERSION(DRV_VERSION);
186
187 /**
188  * igb_read_clock - read raw cycle counter (to be used by time counter)
189  */
190 static cycle_t igb_read_clock(const struct cyclecounter *tc)
191 {
192         struct igb_adapter *adapter =
193                 container_of(tc, struct igb_adapter, cycles);
194         struct e1000_hw *hw = &adapter->hw;
195         u64 stamp = 0;
196         int shift = 0;
197
198         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
199         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
200         return stamp;
201 }
202
203 #ifdef DEBUG
204 /**
205  * igb_get_hw_dev_name - return device name string
206  * used by hardware layer to print debugging information
207  **/
208 char *igb_get_hw_dev_name(struct e1000_hw *hw)
209 {
210         struct igb_adapter *adapter = hw->back;
211         return adapter->netdev->name;
212 }
213
214 /**
215  * igb_get_time_str - format current NIC and system time as string
216  */
217 static char *igb_get_time_str(struct igb_adapter *adapter,
218                               char buffer[160])
219 {
220         cycle_t hw = adapter->cycles.read(&adapter->cycles);
221         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
222         struct timespec sys;
223         struct timespec delta;
224         getnstimeofday(&sys);
225
226         delta = timespec_sub(nic, sys);
227
228         sprintf(buffer,
229                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
230                 hw,
231                 (long)nic.tv_sec, nic.tv_nsec,
232                 (long)sys.tv_sec, sys.tv_nsec,
233                 (long)delta.tv_sec, delta.tv_nsec);
234
235         return buffer;
236 }
237 #endif
238
239 /**
240  * igb_init_module - Driver Registration Routine
241  *
242  * igb_init_module is the first routine called when the driver is
243  * loaded. All it does is register with the PCI subsystem.
244  **/
245 static int __init igb_init_module(void)
246 {
247         int ret;
248         printk(KERN_INFO "%s - version %s\n",
249                igb_driver_string, igb_driver_version);
250
251         printk(KERN_INFO "%s\n", igb_copyright);
252
253 #ifdef CONFIG_IGB_DCA
254         dca_register_notify(&dca_notifier);
255 #endif
256         ret = pci_register_driver(&igb_driver);
257         return ret;
258 }
259
260 module_init(igb_init_module);
261
262 /**
263  * igb_exit_module - Driver Exit Cleanup Routine
264  *
265  * igb_exit_module is called just before the driver is removed
266  * from memory.
267  **/
268 static void __exit igb_exit_module(void)
269 {
270 #ifdef CONFIG_IGB_DCA
271         dca_unregister_notify(&dca_notifier);
272 #endif
273         pci_unregister_driver(&igb_driver);
274 }
275
276 module_exit(igb_exit_module);
277
278 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
279 /**
280  * igb_cache_ring_register - Descriptor ring to register mapping
281  * @adapter: board private structure to initialize
282  *
283  * Once we know the feature-set enabled for the device, we'll cache
284  * the register offset the descriptor ring is assigned to.
285  **/
286 static void igb_cache_ring_register(struct igb_adapter *adapter)
287 {
288         int i = 0, j = 0;
289         u32 rbase_offset = adapter->vfs_allocated_count;
290
291         switch (adapter->hw.mac.type) {
292         case e1000_82576:
293                 /* The queues are allocated for virtualization such that VF 0
294                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
295                  * In order to avoid collision we start at the first free queue
296                  * and continue consuming queues in the same sequence
297                  */
298                 if (adapter->vfs_allocated_count) {
299                         for (; i < adapter->rss_queues; i++)
300                                 adapter->rx_ring[i].reg_idx = rbase_offset +
301                                                               Q_IDX_82576(i);
302                         for (; j < adapter->rss_queues; j++)
303                                 adapter->tx_ring[j].reg_idx = rbase_offset +
304                                                               Q_IDX_82576(j);
305                 }
306         case e1000_82575:
307         default:
308                 for (; i < adapter->num_rx_queues; i++)
309                         adapter->rx_ring[i].reg_idx = rbase_offset + i;
310                 for (; j < adapter->num_tx_queues; j++)
311                         adapter->tx_ring[j].reg_idx = rbase_offset + j;
312                 break;
313         }
314 }
315
316 static void igb_free_queues(struct igb_adapter *adapter)
317 {
318         kfree(adapter->tx_ring);
319         kfree(adapter->rx_ring);
320
321         adapter->tx_ring = NULL;
322         adapter->rx_ring = NULL;
323
324         adapter->num_rx_queues = 0;
325         adapter->num_tx_queues = 0;
326 }
327
328 /**
329  * igb_alloc_queues - Allocate memory for all rings
330  * @adapter: board private structure to initialize
331  *
332  * We allocate one ring per queue at run-time since we don't know the
333  * number of queues at compile-time.
334  **/
335 static int igb_alloc_queues(struct igb_adapter *adapter)
336 {
337         int i;
338
339         adapter->tx_ring = kcalloc(adapter->num_tx_queues,
340                                    sizeof(struct igb_ring), GFP_KERNEL);
341         if (!adapter->tx_ring)
342                 goto err;
343
344         adapter->rx_ring = kcalloc(adapter->num_rx_queues,
345                                    sizeof(struct igb_ring), GFP_KERNEL);
346         if (!adapter->rx_ring)
347                 goto err;
348
349         for (i = 0; i < adapter->num_tx_queues; i++) {
350                 struct igb_ring *ring = &(adapter->tx_ring[i]);
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358         }
359
360         for (i = 0; i < adapter->num_rx_queues; i++) {
361                 struct igb_ring *ring = &(adapter->rx_ring[i]);
362                 ring->count = adapter->rx_ring_count;
363                 ring->queue_index = i;
364                 ring->pdev = adapter->pdev;
365                 ring->netdev = adapter->netdev;
366                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
367                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
368                 /* set flag indicating ring supports SCTP checksum offload */
369                 if (adapter->hw.mac.type >= e1000_82576)
370                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
371         }
372
373         igb_cache_ring_register(adapter);
374
375         return 0;
376
377 err:
378         igb_free_queues(adapter);
379
380         return -ENOMEM;
381 }
382
383 #define IGB_N0_QUEUE -1
384 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
385 {
386         u32 msixbm = 0;
387         struct igb_adapter *adapter = q_vector->adapter;
388         struct e1000_hw *hw = &adapter->hw;
389         u32 ivar, index;
390         int rx_queue = IGB_N0_QUEUE;
391         int tx_queue = IGB_N0_QUEUE;
392
393         if (q_vector->rx_ring)
394                 rx_queue = q_vector->rx_ring->reg_idx;
395         if (q_vector->tx_ring)
396                 tx_queue = q_vector->tx_ring->reg_idx;
397
398         switch (hw->mac.type) {
399         case e1000_82575:
400                 /* The 82575 assigns vectors using a bitmask, which matches the
401                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
402                    or more queues to a vector, we write the appropriate bits
403                    into the MSIXBM register for that vector. */
404                 if (rx_queue > IGB_N0_QUEUE)
405                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
406                 if (tx_queue > IGB_N0_QUEUE)
407                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
408                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
409                 q_vector->eims_value = msixbm;
410                 break;
411         case e1000_82576:
412                 /* 82576 uses a table-based method for assigning vectors.
413                    Each queue has a single entry in the table to which we write
414                    a vector number along with a "valid" bit.  Sadly, the layout
415                    of the table is somewhat counterintuitive. */
416                 if (rx_queue > IGB_N0_QUEUE) {
417                         index = (rx_queue & 0x7);
418                         ivar = array_rd32(E1000_IVAR0, index);
419                         if (rx_queue < 8) {
420                                 /* vector goes into low byte of register */
421                                 ivar = ivar & 0xFFFFFF00;
422                                 ivar |= msix_vector | E1000_IVAR_VALID;
423                         } else {
424                                 /* vector goes into third byte of register */
425                                 ivar = ivar & 0xFF00FFFF;
426                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
427                         }
428                         array_wr32(E1000_IVAR0, index, ivar);
429                 }
430                 if (tx_queue > IGB_N0_QUEUE) {
431                         index = (tx_queue & 0x7);
432                         ivar = array_rd32(E1000_IVAR0, index);
433                         if (tx_queue < 8) {
434                                 /* vector goes into second byte of register */
435                                 ivar = ivar & 0xFFFF00FF;
436                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
437                         } else {
438                                 /* vector goes into high byte of register */
439                                 ivar = ivar & 0x00FFFFFF;
440                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
441                         }
442                         array_wr32(E1000_IVAR0, index, ivar);
443                 }
444                 q_vector->eims_value = 1 << msix_vector;
445                 break;
446         default:
447                 BUG();
448                 break;
449         }
450 }
451
452 /**
453  * igb_configure_msix - Configure MSI-X hardware
454  *
455  * igb_configure_msix sets up the hardware to properly
456  * generate MSI-X interrupts.
457  **/
458 static void igb_configure_msix(struct igb_adapter *adapter)
459 {
460         u32 tmp;
461         int i, vector = 0;
462         struct e1000_hw *hw = &adapter->hw;
463
464         adapter->eims_enable_mask = 0;
465
466         /* set vector for other causes, i.e. link changes */
467         switch (hw->mac.type) {
468         case e1000_82575:
469                 tmp = rd32(E1000_CTRL_EXT);
470                 /* enable MSI-X PBA support*/
471                 tmp |= E1000_CTRL_EXT_PBA_CLR;
472
473                 /* Auto-Mask interrupts upon ICR read. */
474                 tmp |= E1000_CTRL_EXT_EIAME;
475                 tmp |= E1000_CTRL_EXT_IRCA;
476
477                 wr32(E1000_CTRL_EXT, tmp);
478
479                 /* enable msix_other interrupt */
480                 array_wr32(E1000_MSIXBM(0), vector++,
481                                       E1000_EIMS_OTHER);
482                 adapter->eims_other = E1000_EIMS_OTHER;
483
484                 break;
485
486         case e1000_82576:
487                 /* Turn on MSI-X capability first, or our settings
488                  * won't stick.  And it will take days to debug. */
489                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
490                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
491                                 E1000_GPIE_NSICR);
492
493                 /* enable msix_other interrupt */
494                 adapter->eims_other = 1 << vector;
495                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
496
497                 wr32(E1000_IVAR_MISC, tmp);
498                 break;
499         default:
500                 /* do nothing, since nothing else supports MSI-X */
501                 break;
502         } /* switch (hw->mac.type) */
503
504         adapter->eims_enable_mask |= adapter->eims_other;
505
506         for (i = 0; i < adapter->num_q_vectors; i++) {
507                 struct igb_q_vector *q_vector = adapter->q_vector[i];
508                 igb_assign_vector(q_vector, vector++);
509                 adapter->eims_enable_mask |= q_vector->eims_value;
510         }
511
512         wrfl();
513 }
514
515 /**
516  * igb_request_msix - Initialize MSI-X interrupts
517  *
518  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
519  * kernel.
520  **/
521 static int igb_request_msix(struct igb_adapter *adapter)
522 {
523         struct net_device *netdev = adapter->netdev;
524         struct e1000_hw *hw = &adapter->hw;
525         int i, err = 0, vector = 0;
526
527         err = request_irq(adapter->msix_entries[vector].vector,
528                           igb_msix_other, 0, netdev->name, adapter);
529         if (err)
530                 goto out;
531         vector++;
532
533         for (i = 0; i < adapter->num_q_vectors; i++) {
534                 struct igb_q_vector *q_vector = adapter->q_vector[i];
535
536                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
537
538                 if (q_vector->rx_ring && q_vector->tx_ring)
539                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
540                                 q_vector->rx_ring->queue_index);
541                 else if (q_vector->tx_ring)
542                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
543                                 q_vector->tx_ring->queue_index);
544                 else if (q_vector->rx_ring)
545                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
546                                 q_vector->rx_ring->queue_index);
547                 else
548                         sprintf(q_vector->name, "%s-unused", netdev->name);
549
550                 err = request_irq(adapter->msix_entries[vector].vector,
551                                   igb_msix_ring, 0, q_vector->name,
552                                   q_vector);
553                 if (err)
554                         goto out;
555                 vector++;
556         }
557
558         igb_configure_msix(adapter);
559         return 0;
560 out:
561         return err;
562 }
563
564 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
565 {
566         if (adapter->msix_entries) {
567                 pci_disable_msix(adapter->pdev);
568                 kfree(adapter->msix_entries);
569                 adapter->msix_entries = NULL;
570         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
571                 pci_disable_msi(adapter->pdev);
572         }
573 }
574
575 /**
576  * igb_free_q_vectors - Free memory allocated for interrupt vectors
577  * @adapter: board private structure to initialize
578  *
579  * This function frees the memory allocated to the q_vectors.  In addition if
580  * NAPI is enabled it will delete any references to the NAPI struct prior
581  * to freeing the q_vector.
582  **/
583 static void igb_free_q_vectors(struct igb_adapter *adapter)
584 {
585         int v_idx;
586
587         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
588                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
589                 adapter->q_vector[v_idx] = NULL;
590                 netif_napi_del(&q_vector->napi);
591                 kfree(q_vector);
592         }
593         adapter->num_q_vectors = 0;
594 }
595
596 /**
597  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
598  *
599  * This function resets the device so that it has 0 rx queues, tx queues, and
600  * MSI-X interrupts allocated.
601  */
602 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
603 {
604         igb_free_queues(adapter);
605         igb_free_q_vectors(adapter);
606         igb_reset_interrupt_capability(adapter);
607 }
608
609 /**
610  * igb_set_interrupt_capability - set MSI or MSI-X if supported
611  *
612  * Attempt to configure interrupts using the best available
613  * capabilities of the hardware and kernel.
614  **/
615 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
616 {
617         int err;
618         int numvecs, i;
619
620         /* Number of supported queues. */
621         adapter->num_rx_queues = adapter->rss_queues;
622         adapter->num_tx_queues = adapter->rss_queues;
623
624         /* start with one vector for every rx queue */
625         numvecs = adapter->num_rx_queues;
626
627         /* if tx handler is seperate add 1 for every tx queue */
628         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
629                 numvecs += adapter->num_tx_queues;
630
631         /* store the number of vectors reserved for queues */
632         adapter->num_q_vectors = numvecs;
633
634         /* add 1 vector for link status interrupts */
635         numvecs++;
636         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
637                                         GFP_KERNEL);
638         if (!adapter->msix_entries)
639                 goto msi_only;
640
641         for (i = 0; i < numvecs; i++)
642                 adapter->msix_entries[i].entry = i;
643
644         err = pci_enable_msix(adapter->pdev,
645                               adapter->msix_entries,
646                               numvecs);
647         if (err == 0)
648                 goto out;
649
650         igb_reset_interrupt_capability(adapter);
651
652         /* If we can't do MSI-X, try MSI */
653 msi_only:
654 #ifdef CONFIG_PCI_IOV
655         /* disable SR-IOV for non MSI-X configurations */
656         if (adapter->vf_data) {
657                 struct e1000_hw *hw = &adapter->hw;
658                 /* disable iov and allow time for transactions to clear */
659                 pci_disable_sriov(adapter->pdev);
660                 msleep(500);
661
662                 kfree(adapter->vf_data);
663                 adapter->vf_data = NULL;
664                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
665                 msleep(100);
666                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
667         }
668 #endif
669         adapter->vfs_allocated_count = 0;
670         adapter->rss_queues = 1;
671         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
672         adapter->num_rx_queues = 1;
673         adapter->num_tx_queues = 1;
674         adapter->num_q_vectors = 1;
675         if (!pci_enable_msi(adapter->pdev))
676                 adapter->flags |= IGB_FLAG_HAS_MSI;
677 out:
678         /* Notify the stack of the (possibly) reduced Tx Queue count. */
679         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
680         return;
681 }
682
683 /**
684  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
685  * @adapter: board private structure to initialize
686  *
687  * We allocate one q_vector per queue interrupt.  If allocation fails we
688  * return -ENOMEM.
689  **/
690 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
691 {
692         struct igb_q_vector *q_vector;
693         struct e1000_hw *hw = &adapter->hw;
694         int v_idx;
695
696         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
697                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
698                 if (!q_vector)
699                         goto err_out;
700                 q_vector->adapter = adapter;
701                 q_vector->itr_shift = (hw->mac.type == e1000_82575) ? 16 : 0;
702                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
703                 q_vector->itr_val = IGB_START_ITR;
704                 q_vector->set_itr = 1;
705                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
706                 adapter->q_vector[v_idx] = q_vector;
707         }
708         return 0;
709
710 err_out:
711         while (v_idx) {
712                 v_idx--;
713                 q_vector = adapter->q_vector[v_idx];
714                 netif_napi_del(&q_vector->napi);
715                 kfree(q_vector);
716                 adapter->q_vector[v_idx] = NULL;
717         }
718         return -ENOMEM;
719 }
720
721 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
722                                       int ring_idx, int v_idx)
723 {
724         struct igb_q_vector *q_vector;
725
726         q_vector = adapter->q_vector[v_idx];
727         q_vector->rx_ring = &adapter->rx_ring[ring_idx];
728         q_vector->rx_ring->q_vector = q_vector;
729         q_vector->itr_val = adapter->rx_itr_setting;
730         if (q_vector->itr_val && q_vector->itr_val <= 3)
731                 q_vector->itr_val = IGB_START_ITR;
732 }
733
734 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
735                                       int ring_idx, int v_idx)
736 {
737         struct igb_q_vector *q_vector;
738
739         q_vector = adapter->q_vector[v_idx];
740         q_vector->tx_ring = &adapter->tx_ring[ring_idx];
741         q_vector->tx_ring->q_vector = q_vector;
742         q_vector->itr_val = adapter->tx_itr_setting;
743         if (q_vector->itr_val && q_vector->itr_val <= 3)
744                 q_vector->itr_val = IGB_START_ITR;
745 }
746
747 /**
748  * igb_map_ring_to_vector - maps allocated queues to vectors
749  *
750  * This function maps the recently allocated queues to vectors.
751  **/
752 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
753 {
754         int i;
755         int v_idx = 0;
756
757         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
758             (adapter->num_q_vectors < adapter->num_tx_queues))
759                 return -ENOMEM;
760
761         if (adapter->num_q_vectors >=
762             (adapter->num_rx_queues + adapter->num_tx_queues)) {
763                 for (i = 0; i < adapter->num_rx_queues; i++)
764                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
765                 for (i = 0; i < adapter->num_tx_queues; i++)
766                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
767         } else {
768                 for (i = 0; i < adapter->num_rx_queues; i++) {
769                         if (i < adapter->num_tx_queues)
770                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
771                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
772                 }
773                 for (; i < adapter->num_tx_queues; i++)
774                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
775         }
776         return 0;
777 }
778
779 /**
780  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
781  *
782  * This function initializes the interrupts and allocates all of the queues.
783  **/
784 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
785 {
786         struct pci_dev *pdev = adapter->pdev;
787         int err;
788
789         igb_set_interrupt_capability(adapter);
790
791         err = igb_alloc_q_vectors(adapter);
792         if (err) {
793                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
794                 goto err_alloc_q_vectors;
795         }
796
797         err = igb_alloc_queues(adapter);
798         if (err) {
799                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
800                 goto err_alloc_queues;
801         }
802
803         err = igb_map_ring_to_vector(adapter);
804         if (err) {
805                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
806                 goto err_map_queues;
807         }
808
809
810         return 0;
811 err_map_queues:
812         igb_free_queues(adapter);
813 err_alloc_queues:
814         igb_free_q_vectors(adapter);
815 err_alloc_q_vectors:
816         igb_reset_interrupt_capability(adapter);
817         return err;
818 }
819
820 /**
821  * igb_request_irq - initialize interrupts
822  *
823  * Attempts to configure interrupts using the best available
824  * capabilities of the hardware and kernel.
825  **/
826 static int igb_request_irq(struct igb_adapter *adapter)
827 {
828         struct net_device *netdev = adapter->netdev;
829         struct pci_dev *pdev = adapter->pdev;
830         struct e1000_hw *hw = &adapter->hw;
831         int err = 0;
832
833         if (adapter->msix_entries) {
834                 err = igb_request_msix(adapter);
835                 if (!err)
836                         goto request_done;
837                 /* fall back to MSI */
838                 igb_clear_interrupt_scheme(adapter);
839                 if (!pci_enable_msi(adapter->pdev))
840                         adapter->flags |= IGB_FLAG_HAS_MSI;
841                 igb_free_all_tx_resources(adapter);
842                 igb_free_all_rx_resources(adapter);
843                 adapter->num_tx_queues = 1;
844                 adapter->num_rx_queues = 1;
845                 adapter->num_q_vectors = 1;
846                 err = igb_alloc_q_vectors(adapter);
847                 if (err) {
848                         dev_err(&pdev->dev,
849                                 "Unable to allocate memory for vectors\n");
850                         goto request_done;
851                 }
852                 err = igb_alloc_queues(adapter);
853                 if (err) {
854                         dev_err(&pdev->dev,
855                                 "Unable to allocate memory for queues\n");
856                         igb_free_q_vectors(adapter);
857                         goto request_done;
858                 }
859                 igb_setup_all_tx_resources(adapter);
860                 igb_setup_all_rx_resources(adapter);
861         } else {
862                 switch (hw->mac.type) {
863                 case e1000_82575:
864                         wr32(E1000_MSIXBM(0),
865                              (E1000_EICR_RX_QUEUE0 |
866                               E1000_EICR_TX_QUEUE0 |
867                               E1000_EIMS_OTHER));
868                         break;
869                 case e1000_82576:
870                         wr32(E1000_IVAR0, E1000_IVAR_VALID);
871                         break;
872                 default:
873                         break;
874                 }
875         }
876
877         if (adapter->flags & IGB_FLAG_HAS_MSI) {
878                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
879                                   netdev->name, adapter);
880                 if (!err)
881                         goto request_done;
882
883                 /* fall back to legacy interrupts */
884                 igb_reset_interrupt_capability(adapter);
885                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
886         }
887
888         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
889                           netdev->name, adapter);
890
891         if (err)
892                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
893                         err);
894
895 request_done:
896         return err;
897 }
898
899 static void igb_free_irq(struct igb_adapter *adapter)
900 {
901         if (adapter->msix_entries) {
902                 int vector = 0, i;
903
904                 free_irq(adapter->msix_entries[vector++].vector, adapter);
905
906                 for (i = 0; i < adapter->num_q_vectors; i++) {
907                         struct igb_q_vector *q_vector = adapter->q_vector[i];
908                         free_irq(adapter->msix_entries[vector++].vector,
909                                  q_vector);
910                 }
911         } else {
912                 free_irq(adapter->pdev->irq, adapter);
913         }
914 }
915
916 /**
917  * igb_irq_disable - Mask off interrupt generation on the NIC
918  * @adapter: board private structure
919  **/
920 static void igb_irq_disable(struct igb_adapter *adapter)
921 {
922         struct e1000_hw *hw = &adapter->hw;
923
924         /*
925          * we need to be careful when disabling interrupts.  The VFs are also
926          * mapped into these registers and so clearing the bits can cause
927          * issues on the VF drivers so we only need to clear what we set
928          */
929         if (adapter->msix_entries) {
930                 u32 regval = rd32(E1000_EIAM);
931                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
932                 wr32(E1000_EIMC, adapter->eims_enable_mask);
933                 regval = rd32(E1000_EIAC);
934                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
935         }
936
937         wr32(E1000_IAM, 0);
938         wr32(E1000_IMC, ~0);
939         wrfl();
940         synchronize_irq(adapter->pdev->irq);
941 }
942
943 /**
944  * igb_irq_enable - Enable default interrupt generation settings
945  * @adapter: board private structure
946  **/
947 static void igb_irq_enable(struct igb_adapter *adapter)
948 {
949         struct e1000_hw *hw = &adapter->hw;
950
951         if (adapter->msix_entries) {
952                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
953                 u32 regval = rd32(E1000_EIAC);
954                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
955                 regval = rd32(E1000_EIAM);
956                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
957                 wr32(E1000_EIMS, adapter->eims_enable_mask);
958                 if (adapter->vfs_allocated_count) {
959                         wr32(E1000_MBVFIMR, 0xFF);
960                         ims |= E1000_IMS_VMMB;
961                 }
962                 wr32(E1000_IMS, ims);
963         } else {
964                 wr32(E1000_IMS, IMS_ENABLE_MASK);
965                 wr32(E1000_IAM, IMS_ENABLE_MASK);
966         }
967 }
968
969 static void igb_update_mng_vlan(struct igb_adapter *adapter)
970 {
971         struct e1000_hw *hw = &adapter->hw;
972         u16 vid = adapter->hw.mng_cookie.vlan_id;
973         u16 old_vid = adapter->mng_vlan_id;
974
975         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
976                 /* add VID to filter table */
977                 igb_vfta_set(hw, vid, true);
978                 adapter->mng_vlan_id = vid;
979         } else {
980                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
981         }
982
983         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
984             (vid != old_vid) &&
985             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
986                 /* remove VID from filter table */
987                 igb_vfta_set(hw, old_vid, false);
988         }
989 }
990
991 /**
992  * igb_release_hw_control - release control of the h/w to f/w
993  * @adapter: address of board private structure
994  *
995  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
996  * For ASF and Pass Through versions of f/w this means that the
997  * driver is no longer loaded.
998  *
999  **/
1000 static void igb_release_hw_control(struct igb_adapter *adapter)
1001 {
1002         struct e1000_hw *hw = &adapter->hw;
1003         u32 ctrl_ext;
1004
1005         /* Let firmware take over control of h/w */
1006         ctrl_ext = rd32(E1000_CTRL_EXT);
1007         wr32(E1000_CTRL_EXT,
1008                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1009 }
1010
1011 /**
1012  * igb_get_hw_control - get control of the h/w from f/w
1013  * @adapter: address of board private structure
1014  *
1015  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1016  * For ASF and Pass Through versions of f/w this means that
1017  * the driver is loaded.
1018  *
1019  **/
1020 static void igb_get_hw_control(struct igb_adapter *adapter)
1021 {
1022         struct e1000_hw *hw = &adapter->hw;
1023         u32 ctrl_ext;
1024
1025         /* Let firmware know the driver has taken over */
1026         ctrl_ext = rd32(E1000_CTRL_EXT);
1027         wr32(E1000_CTRL_EXT,
1028                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1029 }
1030
1031 /**
1032  * igb_configure - configure the hardware for RX and TX
1033  * @adapter: private board structure
1034  **/
1035 static void igb_configure(struct igb_adapter *adapter)
1036 {
1037         struct net_device *netdev = adapter->netdev;
1038         int i;
1039
1040         igb_get_hw_control(adapter);
1041         igb_set_rx_mode(netdev);
1042
1043         igb_restore_vlan(adapter);
1044
1045         igb_setup_tctl(adapter);
1046         igb_setup_mrqc(adapter);
1047         igb_setup_rctl(adapter);
1048
1049         igb_configure_tx(adapter);
1050         igb_configure_rx(adapter);
1051
1052         igb_rx_fifo_flush_82575(&adapter->hw);
1053
1054         /* call igb_desc_unused which always leaves
1055          * at least 1 descriptor unused to make sure
1056          * next_to_use != next_to_clean */
1057         for (i = 0; i < adapter->num_rx_queues; i++) {
1058                 struct igb_ring *ring = &adapter->rx_ring[i];
1059                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1060         }
1061
1062
1063         adapter->tx_queue_len = netdev->tx_queue_len;
1064 }
1065
1066
1067 /**
1068  * igb_up - Open the interface and prepare it to handle traffic
1069  * @adapter: board private structure
1070  **/
1071 int igb_up(struct igb_adapter *adapter)
1072 {
1073         struct e1000_hw *hw = &adapter->hw;
1074         int i;
1075
1076         /* hardware has been reset, we need to reload some things */
1077         igb_configure(adapter);
1078
1079         clear_bit(__IGB_DOWN, &adapter->state);
1080
1081         for (i = 0; i < adapter->num_q_vectors; i++) {
1082                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1083                 napi_enable(&q_vector->napi);
1084         }
1085         if (adapter->msix_entries)
1086                 igb_configure_msix(adapter);
1087
1088         /* Clear any pending interrupts. */
1089         rd32(E1000_ICR);
1090         igb_irq_enable(adapter);
1091
1092         /* notify VFs that reset has been completed */
1093         if (adapter->vfs_allocated_count) {
1094                 u32 reg_data = rd32(E1000_CTRL_EXT);
1095                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1096                 wr32(E1000_CTRL_EXT, reg_data);
1097         }
1098
1099         netif_tx_start_all_queues(adapter->netdev);
1100
1101         /* start the watchdog. */
1102         hw->mac.get_link_status = 1;
1103         schedule_work(&adapter->watchdog_task);
1104
1105         return 0;
1106 }
1107
1108 void igb_down(struct igb_adapter *adapter)
1109 {
1110         struct net_device *netdev = adapter->netdev;
1111         struct e1000_hw *hw = &adapter->hw;
1112         u32 tctl, rctl;
1113         int i;
1114
1115         /* signal that we're down so the interrupt handler does not
1116          * reschedule our watchdog timer */
1117         set_bit(__IGB_DOWN, &adapter->state);
1118
1119         /* disable receives in the hardware */
1120         rctl = rd32(E1000_RCTL);
1121         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1122         /* flush and sleep below */
1123
1124         netif_tx_stop_all_queues(netdev);
1125
1126         /* disable transmits in the hardware */
1127         tctl = rd32(E1000_TCTL);
1128         tctl &= ~E1000_TCTL_EN;
1129         wr32(E1000_TCTL, tctl);
1130         /* flush both disables and wait for them to finish */
1131         wrfl();
1132         msleep(10);
1133
1134         for (i = 0; i < adapter->num_q_vectors; i++) {
1135                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1136                 napi_disable(&q_vector->napi);
1137         }
1138
1139         igb_irq_disable(adapter);
1140
1141         del_timer_sync(&adapter->watchdog_timer);
1142         del_timer_sync(&adapter->phy_info_timer);
1143
1144         netdev->tx_queue_len = adapter->tx_queue_len;
1145         netif_carrier_off(netdev);
1146
1147         /* record the stats before reset*/
1148         igb_update_stats(adapter);
1149
1150         adapter->link_speed = 0;
1151         adapter->link_duplex = 0;
1152
1153         if (!pci_channel_offline(adapter->pdev))
1154                 igb_reset(adapter);
1155         igb_clean_all_tx_rings(adapter);
1156         igb_clean_all_rx_rings(adapter);
1157 #ifdef CONFIG_IGB_DCA
1158
1159         /* since we reset the hardware DCA settings were cleared */
1160         igb_setup_dca(adapter);
1161 #endif
1162 }
1163
1164 void igb_reinit_locked(struct igb_adapter *adapter)
1165 {
1166         WARN_ON(in_interrupt());
1167         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1168                 msleep(1);
1169         igb_down(adapter);
1170         igb_up(adapter);
1171         clear_bit(__IGB_RESETTING, &adapter->state);
1172 }
1173
1174 void igb_reset(struct igb_adapter *adapter)
1175 {
1176         struct pci_dev *pdev = adapter->pdev;
1177         struct e1000_hw *hw = &adapter->hw;
1178         struct e1000_mac_info *mac = &hw->mac;
1179         struct e1000_fc_info *fc = &hw->fc;
1180         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1181         u16 hwm;
1182
1183         /* Repartition Pba for greater than 9k mtu
1184          * To take effect CTRL.RST is required.
1185          */
1186         switch (mac->type) {
1187         case e1000_82576:
1188                 pba = rd32(E1000_RXPBS);
1189                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1190                 break;
1191         case e1000_82575:
1192         default:
1193                 pba = E1000_PBA_34K;
1194                 break;
1195         }
1196
1197         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1198             (mac->type < e1000_82576)) {
1199                 /* adjust PBA for jumbo frames */
1200                 wr32(E1000_PBA, pba);
1201
1202                 /* To maintain wire speed transmits, the Tx FIFO should be
1203                  * large enough to accommodate two full transmit packets,
1204                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1205                  * the Rx FIFO should be large enough to accommodate at least
1206                  * one full receive packet and is similarly rounded up and
1207                  * expressed in KB. */
1208                 pba = rd32(E1000_PBA);
1209                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1210                 tx_space = pba >> 16;
1211                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1212                 pba &= 0xffff;
1213                 /* the tx fifo also stores 16 bytes of information about the tx
1214                  * but don't include ethernet FCS because hardware appends it */
1215                 min_tx_space = (adapter->max_frame_size +
1216                                 sizeof(union e1000_adv_tx_desc) -
1217                                 ETH_FCS_LEN) * 2;
1218                 min_tx_space = ALIGN(min_tx_space, 1024);
1219                 min_tx_space >>= 10;
1220                 /* software strips receive CRC, so leave room for it */
1221                 min_rx_space = adapter->max_frame_size;
1222                 min_rx_space = ALIGN(min_rx_space, 1024);
1223                 min_rx_space >>= 10;
1224
1225                 /* If current Tx allocation is less than the min Tx FIFO size,
1226                  * and the min Tx FIFO size is less than the current Rx FIFO
1227                  * allocation, take space away from current Rx allocation */
1228                 if (tx_space < min_tx_space &&
1229                     ((min_tx_space - tx_space) < pba)) {
1230                         pba = pba - (min_tx_space - tx_space);
1231
1232                         /* if short on rx space, rx wins and must trump tx
1233                          * adjustment */
1234                         if (pba < min_rx_space)
1235                                 pba = min_rx_space;
1236                 }
1237                 wr32(E1000_PBA, pba);
1238         }
1239
1240         /* flow control settings */
1241         /* The high water mark must be low enough to fit one full frame
1242          * (or the size used for early receive) above it in the Rx FIFO.
1243          * Set it to the lower of:
1244          * - 90% of the Rx FIFO size, or
1245          * - the full Rx FIFO size minus one full frame */
1246         hwm = min(((pba << 10) * 9 / 10),
1247                         ((pba << 10) - 2 * adapter->max_frame_size));
1248
1249         if (mac->type < e1000_82576) {
1250                 fc->high_water = hwm & 0xFFF8;  /* 8-byte granularity */
1251                 fc->low_water = fc->high_water - 8;
1252         } else {
1253                 fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1254                 fc->low_water = fc->high_water - 16;
1255         }
1256         fc->pause_time = 0xFFFF;
1257         fc->send_xon = 1;
1258         fc->current_mode = fc->requested_mode;
1259
1260         /* disable receive for all VFs and wait one second */
1261         if (adapter->vfs_allocated_count) {
1262                 int i;
1263                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1264                         adapter->vf_data[i].flags = 0;
1265
1266                 /* ping all the active vfs to let them know we are going down */
1267                 igb_ping_all_vfs(adapter);
1268
1269                 /* disable transmits and receives */
1270                 wr32(E1000_VFRE, 0);
1271                 wr32(E1000_VFTE, 0);
1272         }
1273
1274         /* Allow time for pending master requests to run */
1275         hw->mac.ops.reset_hw(hw);
1276         wr32(E1000_WUC, 0);
1277
1278         if (hw->mac.ops.init_hw(hw))
1279                 dev_err(&pdev->dev, "Hardware Error\n");
1280
1281         igb_update_mng_vlan(adapter);
1282
1283         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1284         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1285
1286         igb_reset_adaptive(hw);
1287         igb_get_phy_info(hw);
1288 }
1289
1290 static const struct net_device_ops igb_netdev_ops = {
1291         .ndo_open               = igb_open,
1292         .ndo_stop               = igb_close,
1293         .ndo_start_xmit         = igb_xmit_frame_adv,
1294         .ndo_get_stats          = igb_get_stats,
1295         .ndo_set_rx_mode        = igb_set_rx_mode,
1296         .ndo_set_multicast_list = igb_set_rx_mode,
1297         .ndo_set_mac_address    = igb_set_mac,
1298         .ndo_change_mtu         = igb_change_mtu,
1299         .ndo_do_ioctl           = igb_ioctl,
1300         .ndo_tx_timeout         = igb_tx_timeout,
1301         .ndo_validate_addr      = eth_validate_addr,
1302         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1303         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1304         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1305 #ifdef CONFIG_NET_POLL_CONTROLLER
1306         .ndo_poll_controller    = igb_netpoll,
1307 #endif
1308 };
1309
1310 /**
1311  * igb_probe - Device Initialization Routine
1312  * @pdev: PCI device information struct
1313  * @ent: entry in igb_pci_tbl
1314  *
1315  * Returns 0 on success, negative on failure
1316  *
1317  * igb_probe initializes an adapter identified by a pci_dev structure.
1318  * The OS initialization, configuring of the adapter private structure,
1319  * and a hardware reset occur.
1320  **/
1321 static int __devinit igb_probe(struct pci_dev *pdev,
1322                                const struct pci_device_id *ent)
1323 {
1324         struct net_device *netdev;
1325         struct igb_adapter *adapter;
1326         struct e1000_hw *hw;
1327         u16 eeprom_data = 0;
1328         static int global_quad_port_a; /* global quad port a indication */
1329         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1330         unsigned long mmio_start, mmio_len;
1331         int err, pci_using_dac;
1332         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1333         u32 part_num;
1334
1335         err = pci_enable_device_mem(pdev);
1336         if (err)
1337                 return err;
1338
1339         pci_using_dac = 0;
1340         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1341         if (!err) {
1342                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1343                 if (!err)
1344                         pci_using_dac = 1;
1345         } else {
1346                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1347                 if (err) {
1348                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1349                         if (err) {
1350                                 dev_err(&pdev->dev, "No usable DMA "
1351                                         "configuration, aborting\n");
1352                                 goto err_dma;
1353                         }
1354                 }
1355         }
1356
1357         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1358                                            IORESOURCE_MEM),
1359                                            igb_driver_name);
1360         if (err)
1361                 goto err_pci_reg;
1362
1363         pci_enable_pcie_error_reporting(pdev);
1364
1365         pci_set_master(pdev);
1366         pci_save_state(pdev);
1367
1368         err = -ENOMEM;
1369         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1370                                    IGB_ABS_MAX_TX_QUEUES);
1371         if (!netdev)
1372                 goto err_alloc_etherdev;
1373
1374         SET_NETDEV_DEV(netdev, &pdev->dev);
1375
1376         pci_set_drvdata(pdev, netdev);
1377         adapter = netdev_priv(netdev);
1378         adapter->netdev = netdev;
1379         adapter->pdev = pdev;
1380         hw = &adapter->hw;
1381         hw->back = adapter;
1382         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1383
1384         mmio_start = pci_resource_start(pdev, 0);
1385         mmio_len = pci_resource_len(pdev, 0);
1386
1387         err = -EIO;
1388         hw->hw_addr = ioremap(mmio_start, mmio_len);
1389         if (!hw->hw_addr)
1390                 goto err_ioremap;
1391
1392         netdev->netdev_ops = &igb_netdev_ops;
1393         igb_set_ethtool_ops(netdev);
1394         netdev->watchdog_timeo = 5 * HZ;
1395
1396         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1397
1398         netdev->mem_start = mmio_start;
1399         netdev->mem_end = mmio_start + mmio_len;
1400
1401         /* PCI config space info */
1402         hw->vendor_id = pdev->vendor;
1403         hw->device_id = pdev->device;
1404         hw->revision_id = pdev->revision;
1405         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1406         hw->subsystem_device_id = pdev->subsystem_device;
1407
1408         /* Copy the default MAC, PHY and NVM function pointers */
1409         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1410         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1411         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1412         /* Initialize skew-specific constants */
1413         err = ei->get_invariants(hw);
1414         if (err)
1415                 goto err_sw_init;
1416
1417         /* setup the private structure */
1418         err = igb_sw_init(adapter);
1419         if (err)
1420                 goto err_sw_init;
1421
1422         igb_get_bus_info_pcie(hw);
1423
1424         hw->phy.autoneg_wait_to_complete = false;
1425         hw->mac.adaptive_ifs = true;
1426
1427         /* Copper options */
1428         if (hw->phy.media_type == e1000_media_type_copper) {
1429                 hw->phy.mdix = AUTO_ALL_MODES;
1430                 hw->phy.disable_polarity_correction = false;
1431                 hw->phy.ms_type = e1000_ms_hw_default;
1432         }
1433
1434         if (igb_check_reset_block(hw))
1435                 dev_info(&pdev->dev,
1436                         "PHY reset is blocked due to SOL/IDER session.\n");
1437
1438         netdev->features = NETIF_F_SG |
1439                            NETIF_F_IP_CSUM |
1440                            NETIF_F_HW_VLAN_TX |
1441                            NETIF_F_HW_VLAN_RX |
1442                            NETIF_F_HW_VLAN_FILTER;
1443
1444         netdev->features |= NETIF_F_IPV6_CSUM;
1445         netdev->features |= NETIF_F_TSO;
1446         netdev->features |= NETIF_F_TSO6;
1447         netdev->features |= NETIF_F_GRO;
1448
1449         netdev->vlan_features |= NETIF_F_TSO;
1450         netdev->vlan_features |= NETIF_F_TSO6;
1451         netdev->vlan_features |= NETIF_F_IP_CSUM;
1452         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1453         netdev->vlan_features |= NETIF_F_SG;
1454
1455         if (pci_using_dac)
1456                 netdev->features |= NETIF_F_HIGHDMA;
1457
1458         if (hw->mac.type >= e1000_82576)
1459                 netdev->features |= NETIF_F_SCTP_CSUM;
1460
1461         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1462
1463         /* before reading the NVM, reset the controller to put the device in a
1464          * known good starting state */
1465         hw->mac.ops.reset_hw(hw);
1466
1467         /* make sure the NVM is good */
1468         if (igb_validate_nvm_checksum(hw) < 0) {
1469                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1470                 err = -EIO;
1471                 goto err_eeprom;
1472         }
1473
1474         /* copy the MAC address out of the NVM */
1475         if (hw->mac.ops.read_mac_addr(hw))
1476                 dev_err(&pdev->dev, "NVM Read Error\n");
1477
1478         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1479         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1480
1481         if (!is_valid_ether_addr(netdev->perm_addr)) {
1482                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1483                 err = -EIO;
1484                 goto err_eeprom;
1485         }
1486
1487         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1488                     (unsigned long) adapter);
1489         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1490                     (unsigned long) adapter);
1491
1492         INIT_WORK(&adapter->reset_task, igb_reset_task);
1493         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1494
1495         /* Initialize link properties that are user-changeable */
1496         adapter->fc_autoneg = true;
1497         hw->mac.autoneg = true;
1498         hw->phy.autoneg_advertised = 0x2f;
1499
1500         hw->fc.requested_mode = e1000_fc_default;
1501         hw->fc.current_mode = e1000_fc_default;
1502
1503         igb_validate_mdi_setting(hw);
1504
1505         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1506          * enable the ACPI Magic Packet filter
1507          */
1508
1509         if (hw->bus.func == 0)
1510                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1511         else if (hw->bus.func == 1)
1512                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1513
1514         if (eeprom_data & eeprom_apme_mask)
1515                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1516
1517         /* now that we have the eeprom settings, apply the special cases where
1518          * the eeprom may be wrong or the board simply won't support wake on
1519          * lan on a particular port */
1520         switch (pdev->device) {
1521         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1522                 adapter->eeprom_wol = 0;
1523                 break;
1524         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1525         case E1000_DEV_ID_82576_FIBER:
1526         case E1000_DEV_ID_82576_SERDES:
1527                 /* Wake events only supported on port A for dual fiber
1528                  * regardless of eeprom setting */
1529                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1530                         adapter->eeprom_wol = 0;
1531                 break;
1532         case E1000_DEV_ID_82576_QUAD_COPPER:
1533                 /* if quad port adapter, disable WoL on all but port A */
1534                 if (global_quad_port_a != 0)
1535                         adapter->eeprom_wol = 0;
1536                 else
1537                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1538                 /* Reset for multiple quad port adapters */
1539                 if (++global_quad_port_a == 4)
1540                         global_quad_port_a = 0;
1541                 break;
1542         }
1543
1544         /* initialize the wol settings based on the eeprom settings */
1545         adapter->wol = adapter->eeprom_wol;
1546         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1547
1548         /* reset the hardware with the new settings */
1549         igb_reset(adapter);
1550
1551         /* let the f/w know that the h/w is now under the control of the
1552          * driver. */
1553         igb_get_hw_control(adapter);
1554
1555         strcpy(netdev->name, "eth%d");
1556         err = register_netdev(netdev);
1557         if (err)
1558                 goto err_register;
1559
1560         /* carrier off reporting is important to ethtool even BEFORE open */
1561         netif_carrier_off(netdev);
1562
1563 #ifdef CONFIG_IGB_DCA
1564         if (dca_add_requester(&pdev->dev) == 0) {
1565                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1566                 dev_info(&pdev->dev, "DCA enabled\n");
1567                 igb_setup_dca(adapter);
1568         }
1569
1570 #endif
1571         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1572         /* print bus type/speed/width info */
1573         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1574                  netdev->name,
1575                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1576                                                             "unknown"),
1577                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1578                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1579                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1580                    "unknown"),
1581                  netdev->dev_addr);
1582
1583         igb_read_part_num(hw, &part_num);
1584         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1585                 (part_num >> 8), (part_num & 0xff));
1586
1587         dev_info(&pdev->dev,
1588                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1589                 adapter->msix_entries ? "MSI-X" :
1590                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1591                 adapter->num_rx_queues, adapter->num_tx_queues);
1592
1593         return 0;
1594
1595 err_register:
1596         igb_release_hw_control(adapter);
1597 err_eeprom:
1598         if (!igb_check_reset_block(hw))
1599                 igb_reset_phy(hw);
1600
1601         if (hw->flash_address)
1602                 iounmap(hw->flash_address);
1603 err_sw_init:
1604         igb_clear_interrupt_scheme(adapter);
1605         iounmap(hw->hw_addr);
1606 err_ioremap:
1607         free_netdev(netdev);
1608 err_alloc_etherdev:
1609         pci_release_selected_regions(pdev,
1610                                      pci_select_bars(pdev, IORESOURCE_MEM));
1611 err_pci_reg:
1612 err_dma:
1613         pci_disable_device(pdev);
1614         return err;
1615 }
1616
1617 /**
1618  * igb_remove - Device Removal Routine
1619  * @pdev: PCI device information struct
1620  *
1621  * igb_remove is called by the PCI subsystem to alert the driver
1622  * that it should release a PCI device.  The could be caused by a
1623  * Hot-Plug event, or because the driver is going to be removed from
1624  * memory.
1625  **/
1626 static void __devexit igb_remove(struct pci_dev *pdev)
1627 {
1628         struct net_device *netdev = pci_get_drvdata(pdev);
1629         struct igb_adapter *adapter = netdev_priv(netdev);
1630         struct e1000_hw *hw = &adapter->hw;
1631
1632         /* flush_scheduled work may reschedule our watchdog task, so
1633          * explicitly disable watchdog tasks from being rescheduled  */
1634         set_bit(__IGB_DOWN, &adapter->state);
1635         del_timer_sync(&adapter->watchdog_timer);
1636         del_timer_sync(&adapter->phy_info_timer);
1637
1638         flush_scheduled_work();
1639
1640 #ifdef CONFIG_IGB_DCA
1641         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1642                 dev_info(&pdev->dev, "DCA disabled\n");
1643                 dca_remove_requester(&pdev->dev);
1644                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1645                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1646         }
1647 #endif
1648
1649         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1650          * would have already happened in close and is redundant. */
1651         igb_release_hw_control(adapter);
1652
1653         unregister_netdev(netdev);
1654
1655         if (!igb_check_reset_block(hw))
1656                 igb_reset_phy(hw);
1657
1658         igb_clear_interrupt_scheme(adapter);
1659
1660 #ifdef CONFIG_PCI_IOV
1661         /* reclaim resources allocated to VFs */
1662         if (adapter->vf_data) {
1663                 /* disable iov and allow time for transactions to clear */
1664                 pci_disable_sriov(pdev);
1665                 msleep(500);
1666
1667                 kfree(adapter->vf_data);
1668                 adapter->vf_data = NULL;
1669                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1670                 msleep(100);
1671                 dev_info(&pdev->dev, "IOV Disabled\n");
1672         }
1673 #endif
1674
1675         iounmap(hw->hw_addr);
1676         if (hw->flash_address)
1677                 iounmap(hw->flash_address);
1678         pci_release_selected_regions(pdev,
1679                                      pci_select_bars(pdev, IORESOURCE_MEM));
1680
1681         free_netdev(netdev);
1682
1683         pci_disable_pcie_error_reporting(pdev);
1684
1685         pci_disable_device(pdev);
1686 }
1687
1688 /**
1689  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1690  * @adapter: board private structure to initialize
1691  *
1692  * This function initializes the vf specific data storage and then attempts to
1693  * allocate the VFs.  The reason for ordering it this way is because it is much
1694  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1695  * the memory for the VFs.
1696  **/
1697 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1698 {
1699 #ifdef CONFIG_PCI_IOV
1700         struct pci_dev *pdev = adapter->pdev;
1701
1702         if (adapter->vfs_allocated_count > 7)
1703                 adapter->vfs_allocated_count = 7;
1704
1705         if (adapter->vfs_allocated_count) {
1706                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1707                                            sizeof(struct vf_data_storage),
1708                                            GFP_KERNEL);
1709                 /* if allocation failed then we do not support SR-IOV */
1710                 if (!adapter->vf_data) {
1711                         adapter->vfs_allocated_count = 0;
1712                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1713                                 "Data Storage\n");
1714                 }
1715         }
1716
1717         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1718                 kfree(adapter->vf_data);
1719                 adapter->vf_data = NULL;
1720 #endif /* CONFIG_PCI_IOV */
1721                 adapter->vfs_allocated_count = 0;
1722 #ifdef CONFIG_PCI_IOV
1723         } else {
1724                 unsigned char mac_addr[ETH_ALEN];
1725                 int i;
1726                 dev_info(&pdev->dev, "%d vfs allocated\n",
1727                          adapter->vfs_allocated_count);
1728                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1729                         random_ether_addr(mac_addr);
1730                         igb_set_vf_mac(adapter, i, mac_addr);
1731                 }
1732         }
1733 #endif /* CONFIG_PCI_IOV */
1734 }
1735
1736
1737 /**
1738  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1739  * @adapter: board private structure to initialize
1740  *
1741  * igb_init_hw_timer initializes the function pointer and values for the hw
1742  * timer found in hardware.
1743  **/
1744 static void igb_init_hw_timer(struct igb_adapter *adapter)
1745 {
1746         struct e1000_hw *hw = &adapter->hw;
1747
1748         switch (hw->mac.type) {
1749         case e1000_82576:
1750                 /*
1751                  * Initialize hardware timer: we keep it running just in case
1752                  * that some program needs it later on.
1753                  */
1754                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1755                 adapter->cycles.read = igb_read_clock;
1756                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1757                 adapter->cycles.mult = 1;
1758                 /**
1759                  * Scale the NIC clock cycle by a large factor so that
1760                  * relatively small clock corrections can be added or
1761                  * substracted at each clock tick. The drawbacks of a large
1762                  * factor are a) that the clock register overflows more quickly
1763                  * (not such a big deal) and b) that the increment per tick has
1764                  * to fit into 24 bits.  As a result we need to use a shift of
1765                  * 19 so we can fit a value of 16 into the TIMINCA register.
1766                  */
1767                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1768                 wr32(E1000_TIMINCA,
1769                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1770                                 (16 << IGB_82576_TSYNC_SHIFT));
1771
1772                 /* Set registers so that rollover occurs soon to test this. */
1773                 wr32(E1000_SYSTIML, 0x00000000);
1774                 wr32(E1000_SYSTIMH, 0xFF800000);
1775                 wrfl();
1776
1777                 timecounter_init(&adapter->clock,
1778                                  &adapter->cycles,
1779                                  ktime_to_ns(ktime_get_real()));
1780                 /*
1781                  * Synchronize our NIC clock against system wall clock. NIC
1782                  * time stamp reading requires ~3us per sample, each sample
1783                  * was pretty stable even under load => only require 10
1784                  * samples for each offset comparison.
1785                  */
1786                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1787                 adapter->compare.source = &adapter->clock;
1788                 adapter->compare.target = ktime_get_real;
1789                 adapter->compare.num_samples = 10;
1790                 timecompare_update(&adapter->compare, 0);
1791                 break;
1792         case e1000_82575:
1793                 /* 82575 does not support timesync */
1794         default:
1795                 break;
1796         }
1797
1798 }
1799
1800 /**
1801  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1802  * @adapter: board private structure to initialize
1803  *
1804  * igb_sw_init initializes the Adapter private data structure.
1805  * Fields are initialized based on PCI device information and
1806  * OS network device settings (MTU size).
1807  **/
1808 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1809 {
1810         struct e1000_hw *hw = &adapter->hw;
1811         struct net_device *netdev = adapter->netdev;
1812         struct pci_dev *pdev = adapter->pdev;
1813
1814         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1815
1816         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1817         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1818         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1819         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1820
1821         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1822         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1823
1824 #ifdef CONFIG_PCI_IOV
1825         if (hw->mac.type == e1000_82576)
1826                 adapter->vfs_allocated_count = max_vfs;
1827
1828 #endif /* CONFIG_PCI_IOV */
1829         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1830
1831         /*
1832          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1833          * then we should combine the queues into a queue pair in order to
1834          * conserve interrupts due to limited supply
1835          */
1836         if ((adapter->rss_queues > 4) ||
1837             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1838                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1839
1840         /* This call may decrease the number of queues */
1841         if (igb_init_interrupt_scheme(adapter)) {
1842                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1843                 return -ENOMEM;
1844         }
1845
1846         igb_init_hw_timer(adapter);
1847         igb_probe_vfs(adapter);
1848
1849         /* Explicitly disable IRQ since the NIC can be in any state. */
1850         igb_irq_disable(adapter);
1851
1852         set_bit(__IGB_DOWN, &adapter->state);
1853         return 0;
1854 }
1855
1856 /**
1857  * igb_open - Called when a network interface is made active
1858  * @netdev: network interface device structure
1859  *
1860  * Returns 0 on success, negative value on failure
1861  *
1862  * The open entry point is called when a network interface is made
1863  * active by the system (IFF_UP).  At this point all resources needed
1864  * for transmit and receive operations are allocated, the interrupt
1865  * handler is registered with the OS, the watchdog timer is started,
1866  * and the stack is notified that the interface is ready.
1867  **/
1868 static int igb_open(struct net_device *netdev)
1869 {
1870         struct igb_adapter *adapter = netdev_priv(netdev);
1871         struct e1000_hw *hw = &adapter->hw;
1872         int err;
1873         int i;
1874
1875         /* disallow open during test */
1876         if (test_bit(__IGB_TESTING, &adapter->state))
1877                 return -EBUSY;
1878
1879         netif_carrier_off(netdev);
1880
1881         /* allocate transmit descriptors */
1882         err = igb_setup_all_tx_resources(adapter);
1883         if (err)
1884                 goto err_setup_tx;
1885
1886         /* allocate receive descriptors */
1887         err = igb_setup_all_rx_resources(adapter);
1888         if (err)
1889                 goto err_setup_rx;
1890
1891         /* e1000_power_up_phy(adapter); */
1892
1893         /* before we allocate an interrupt, we must be ready to handle it.
1894          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1895          * as soon as we call pci_request_irq, so we have to setup our
1896          * clean_rx handler before we do so.  */
1897         igb_configure(adapter);
1898
1899         err = igb_request_irq(adapter);
1900         if (err)
1901                 goto err_req_irq;
1902
1903         /* From here on the code is the same as igb_up() */
1904         clear_bit(__IGB_DOWN, &adapter->state);
1905
1906         for (i = 0; i < adapter->num_q_vectors; i++) {
1907                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1908                 napi_enable(&q_vector->napi);
1909         }
1910
1911         /* Clear any pending interrupts. */
1912         rd32(E1000_ICR);
1913
1914         igb_irq_enable(adapter);
1915
1916         /* notify VFs that reset has been completed */
1917         if (adapter->vfs_allocated_count) {
1918                 u32 reg_data = rd32(E1000_CTRL_EXT);
1919                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1920                 wr32(E1000_CTRL_EXT, reg_data);
1921         }
1922
1923         netif_tx_start_all_queues(netdev);
1924
1925         /* start the watchdog. */
1926         hw->mac.get_link_status = 1;
1927         schedule_work(&adapter->watchdog_task);
1928
1929         return 0;
1930
1931 err_req_irq:
1932         igb_release_hw_control(adapter);
1933         /* e1000_power_down_phy(adapter); */
1934         igb_free_all_rx_resources(adapter);
1935 err_setup_rx:
1936         igb_free_all_tx_resources(adapter);
1937 err_setup_tx:
1938         igb_reset(adapter);
1939
1940         return err;
1941 }
1942
1943 /**
1944  * igb_close - Disables a network interface
1945  * @netdev: network interface device structure
1946  *
1947  * Returns 0, this is not allowed to fail
1948  *
1949  * The close entry point is called when an interface is de-activated
1950  * by the OS.  The hardware is still under the driver's control, but
1951  * needs to be disabled.  A global MAC reset is issued to stop the
1952  * hardware, and all transmit and receive resources are freed.
1953  **/
1954 static int igb_close(struct net_device *netdev)
1955 {
1956         struct igb_adapter *adapter = netdev_priv(netdev);
1957
1958         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
1959         igb_down(adapter);
1960
1961         igb_free_irq(adapter);
1962
1963         igb_free_all_tx_resources(adapter);
1964         igb_free_all_rx_resources(adapter);
1965
1966         return 0;
1967 }
1968
1969 /**
1970  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
1971  * @tx_ring: tx descriptor ring (for a specific queue) to setup
1972  *
1973  * Return 0 on success, negative on failure
1974  **/
1975 int igb_setup_tx_resources(struct igb_ring *tx_ring)
1976 {
1977         struct pci_dev *pdev = tx_ring->pdev;
1978         int size;
1979
1980         size = sizeof(struct igb_buffer) * tx_ring->count;
1981         tx_ring->buffer_info = vmalloc(size);
1982         if (!tx_ring->buffer_info)
1983                 goto err;
1984         memset(tx_ring->buffer_info, 0, size);
1985
1986         /* round up to nearest 4K */
1987         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
1988         tx_ring->size = ALIGN(tx_ring->size, 4096);
1989
1990         tx_ring->desc = pci_alloc_consistent(pdev,
1991                                              tx_ring->size,
1992                                              &tx_ring->dma);
1993
1994         if (!tx_ring->desc)
1995                 goto err;
1996
1997         tx_ring->next_to_use = 0;
1998         tx_ring->next_to_clean = 0;
1999         return 0;
2000
2001 err:
2002         vfree(tx_ring->buffer_info);
2003         dev_err(&pdev->dev,
2004                 "Unable to allocate memory for the transmit descriptor ring\n");
2005         return -ENOMEM;
2006 }
2007
2008 /**
2009  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2010  *                                (Descriptors) for all queues
2011  * @adapter: board private structure
2012  *
2013  * Return 0 on success, negative on failure
2014  **/
2015 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2016 {
2017         struct pci_dev *pdev = adapter->pdev;
2018         int i, err = 0;
2019
2020         for (i = 0; i < adapter->num_tx_queues; i++) {
2021                 err = igb_setup_tx_resources(&adapter->tx_ring[i]);
2022                 if (err) {
2023                         dev_err(&pdev->dev,
2024                                 "Allocation for Tx Queue %u failed\n", i);
2025                         for (i--; i >= 0; i--)
2026                                 igb_free_tx_resources(&adapter->tx_ring[i]);
2027                         break;
2028                 }
2029         }
2030
2031         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2032                 int r_idx = i % adapter->num_tx_queues;
2033                 adapter->multi_tx_table[i] = &adapter->tx_ring[r_idx];
2034         }
2035         return err;
2036 }
2037
2038 /**
2039  * igb_setup_tctl - configure the transmit control registers
2040  * @adapter: Board private structure
2041  **/
2042 void igb_setup_tctl(struct igb_adapter *adapter)
2043 {
2044         struct e1000_hw *hw = &adapter->hw;
2045         u32 tctl;
2046
2047         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2048         wr32(E1000_TXDCTL(0), 0);
2049
2050         /* Program the Transmit Control Register */
2051         tctl = rd32(E1000_TCTL);
2052         tctl &= ~E1000_TCTL_CT;
2053         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2054                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2055
2056         igb_config_collision_dist(hw);
2057
2058         /* Enable transmits */
2059         tctl |= E1000_TCTL_EN;
2060
2061         wr32(E1000_TCTL, tctl);
2062 }
2063
2064 /**
2065  * igb_configure_tx_ring - Configure transmit ring after Reset
2066  * @adapter: board private structure
2067  * @ring: tx ring to configure
2068  *
2069  * Configure a transmit ring after a reset.
2070  **/
2071 void igb_configure_tx_ring(struct igb_adapter *adapter,
2072                            struct igb_ring *ring)
2073 {
2074         struct e1000_hw *hw = &adapter->hw;
2075         u32 txdctl;
2076         u64 tdba = ring->dma;
2077         int reg_idx = ring->reg_idx;
2078
2079         /* disable the queue */
2080         txdctl = rd32(E1000_TXDCTL(reg_idx));
2081         wr32(E1000_TXDCTL(reg_idx),
2082                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2083         wrfl();
2084         mdelay(10);
2085
2086         wr32(E1000_TDLEN(reg_idx),
2087                         ring->count * sizeof(union e1000_adv_tx_desc));
2088         wr32(E1000_TDBAL(reg_idx),
2089                         tdba & 0x00000000ffffffffULL);
2090         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2091
2092         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2093         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2094         writel(0, ring->head);
2095         writel(0, ring->tail);
2096
2097         txdctl |= IGB_TX_PTHRESH;
2098         txdctl |= IGB_TX_HTHRESH << 8;
2099         txdctl |= IGB_TX_WTHRESH << 16;
2100
2101         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2102         wr32(E1000_TXDCTL(reg_idx), txdctl);
2103 }
2104
2105 /**
2106  * igb_configure_tx - Configure transmit Unit after Reset
2107  * @adapter: board private structure
2108  *
2109  * Configure the Tx unit of the MAC after a reset.
2110  **/
2111 static void igb_configure_tx(struct igb_adapter *adapter)
2112 {
2113         int i;
2114
2115         for (i = 0; i < adapter->num_tx_queues; i++)
2116                 igb_configure_tx_ring(adapter, &adapter->tx_ring[i]);
2117 }
2118
2119 /**
2120  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2121  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2122  *
2123  * Returns 0 on success, negative on failure
2124  **/
2125 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2126 {
2127         struct pci_dev *pdev = rx_ring->pdev;
2128         int size, desc_len;
2129
2130         size = sizeof(struct igb_buffer) * rx_ring->count;
2131         rx_ring->buffer_info = vmalloc(size);
2132         if (!rx_ring->buffer_info)
2133                 goto err;
2134         memset(rx_ring->buffer_info, 0, size);
2135
2136         desc_len = sizeof(union e1000_adv_rx_desc);
2137
2138         /* Round up to nearest 4K */
2139         rx_ring->size = rx_ring->count * desc_len;
2140         rx_ring->size = ALIGN(rx_ring->size, 4096);
2141
2142         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2143                                              &rx_ring->dma);
2144
2145         if (!rx_ring->desc)
2146                 goto err;
2147
2148         rx_ring->next_to_clean = 0;
2149         rx_ring->next_to_use = 0;
2150
2151         return 0;
2152
2153 err:
2154         vfree(rx_ring->buffer_info);
2155         rx_ring->buffer_info = NULL;
2156         dev_err(&pdev->dev, "Unable to allocate memory for "
2157                 "the receive descriptor ring\n");
2158         return -ENOMEM;
2159 }
2160
2161 /**
2162  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2163  *                                (Descriptors) for all queues
2164  * @adapter: board private structure
2165  *
2166  * Return 0 on success, negative on failure
2167  **/
2168 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2169 {
2170         struct pci_dev *pdev = adapter->pdev;
2171         int i, err = 0;
2172
2173         for (i = 0; i < adapter->num_rx_queues; i++) {
2174                 err = igb_setup_rx_resources(&adapter->rx_ring[i]);
2175                 if (err) {
2176                         dev_err(&pdev->dev,
2177                                 "Allocation for Rx Queue %u failed\n", i);
2178                         for (i--; i >= 0; i--)
2179                                 igb_free_rx_resources(&adapter->rx_ring[i]);
2180                         break;
2181                 }
2182         }
2183
2184         return err;
2185 }
2186
2187 /**
2188  * igb_setup_mrqc - configure the multiple receive queue control registers
2189  * @adapter: Board private structure
2190  **/
2191 static void igb_setup_mrqc(struct igb_adapter *adapter)
2192 {
2193         struct e1000_hw *hw = &adapter->hw;
2194         u32 mrqc, rxcsum;
2195         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2196         union e1000_reta {
2197                 u32 dword;
2198                 u8  bytes[4];
2199         } reta;
2200         static const u8 rsshash[40] = {
2201                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2202                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2203                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2204                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2205
2206         /* Fill out hash function seeds */
2207         for (j = 0; j < 10; j++) {
2208                 u32 rsskey = rsshash[(j * 4)];
2209                 rsskey |= rsshash[(j * 4) + 1] << 8;
2210                 rsskey |= rsshash[(j * 4) + 2] << 16;
2211                 rsskey |= rsshash[(j * 4) + 3] << 24;
2212                 array_wr32(E1000_RSSRK(0), j, rsskey);
2213         }
2214
2215         num_rx_queues = adapter->rss_queues;
2216
2217         if (adapter->vfs_allocated_count) {
2218                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2219                 switch (hw->mac.type) {
2220                 case e1000_82576:
2221                         shift = 3;
2222                         num_rx_queues = 2;
2223                         break;
2224                 case e1000_82575:
2225                         shift = 2;
2226                         shift2 = 6;
2227                 default:
2228                         break;
2229                 }
2230         } else {
2231                 if (hw->mac.type == e1000_82575)
2232                         shift = 6;
2233         }
2234
2235         for (j = 0; j < (32 * 4); j++) {
2236                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2237                 if (shift2)
2238                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2239                 if ((j & 3) == 3)
2240                         wr32(E1000_RETA(j >> 2), reta.dword);
2241         }
2242
2243         /*
2244          * Disable raw packet checksumming so that RSS hash is placed in
2245          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2246          * offloads as they are enabled by default
2247          */
2248         rxcsum = rd32(E1000_RXCSUM);
2249         rxcsum |= E1000_RXCSUM_PCSD;
2250
2251         if (adapter->hw.mac.type >= e1000_82576)
2252                 /* Enable Receive Checksum Offload for SCTP */
2253                 rxcsum |= E1000_RXCSUM_CRCOFL;
2254
2255         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2256         wr32(E1000_RXCSUM, rxcsum);
2257
2258         /* If VMDq is enabled then we set the appropriate mode for that, else
2259          * we default to RSS so that an RSS hash is calculated per packet even
2260          * if we are only using one queue */
2261         if (adapter->vfs_allocated_count) {
2262                 if (hw->mac.type > e1000_82575) {
2263                         /* Set the default pool for the PF's first queue */
2264                         u32 vtctl = rd32(E1000_VT_CTL);
2265                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2266                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2267                         vtctl |= adapter->vfs_allocated_count <<
2268                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2269                         wr32(E1000_VT_CTL, vtctl);
2270                 }
2271                 if (adapter->rss_queues > 1)
2272                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2273                 else
2274                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2275         } else {
2276                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2277         }
2278         igb_vmm_control(adapter);
2279
2280         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2281                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2282         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2283                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2284         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2285                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2286         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2287                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2288
2289         wr32(E1000_MRQC, mrqc);
2290 }
2291
2292 /**
2293  * igb_setup_rctl - configure the receive control registers
2294  * @adapter: Board private structure
2295  **/
2296 void igb_setup_rctl(struct igb_adapter *adapter)
2297 {
2298         struct e1000_hw *hw = &adapter->hw;
2299         u32 rctl;
2300
2301         rctl = rd32(E1000_RCTL);
2302
2303         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2304         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2305
2306         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2307                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2308
2309         /*
2310          * enable stripping of CRC. It's unlikely this will break BMC
2311          * redirection as it did with e1000. Newer features require
2312          * that the HW strips the CRC.
2313          */
2314         rctl |= E1000_RCTL_SECRC;
2315
2316         /* disable store bad packets and clear size bits. */
2317         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2318
2319         /* enable LPE to prevent packets larger than max_frame_size */
2320         rctl |= E1000_RCTL_LPE;
2321
2322         /* disable queue 0 to prevent tail write w/o re-config */
2323         wr32(E1000_RXDCTL(0), 0);
2324
2325         /* Attention!!!  For SR-IOV PF driver operations you must enable
2326          * queue drop for all VF and PF queues to prevent head of line blocking
2327          * if an un-trusted VF does not provide descriptors to hardware.
2328          */
2329         if (adapter->vfs_allocated_count) {
2330                 /* set all queue drop enable bits */
2331                 wr32(E1000_QDE, ALL_QUEUES);
2332         }
2333
2334         wr32(E1000_RCTL, rctl);
2335 }
2336
2337 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2338                                    int vfn)
2339 {
2340         struct e1000_hw *hw = &adapter->hw;
2341         u32 vmolr;
2342
2343         /* if it isn't the PF check to see if VFs are enabled and
2344          * increase the size to support vlan tags */
2345         if (vfn < adapter->vfs_allocated_count &&
2346             adapter->vf_data[vfn].vlans_enabled)
2347                 size += VLAN_TAG_SIZE;
2348
2349         vmolr = rd32(E1000_VMOLR(vfn));
2350         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2351         vmolr |= size | E1000_VMOLR_LPE;
2352         wr32(E1000_VMOLR(vfn), vmolr);
2353
2354         return 0;
2355 }
2356
2357 /**
2358  * igb_rlpml_set - set maximum receive packet size
2359  * @adapter: board private structure
2360  *
2361  * Configure maximum receivable packet size.
2362  **/
2363 static void igb_rlpml_set(struct igb_adapter *adapter)
2364 {
2365         u32 max_frame_size = adapter->max_frame_size;
2366         struct e1000_hw *hw = &adapter->hw;
2367         u16 pf_id = adapter->vfs_allocated_count;
2368
2369         if (adapter->vlgrp)
2370                 max_frame_size += VLAN_TAG_SIZE;
2371
2372         /* if vfs are enabled we set RLPML to the largest possible request
2373          * size and set the VMOLR RLPML to the size we need */
2374         if (pf_id) {
2375                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2376                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2377         }
2378
2379         wr32(E1000_RLPML, max_frame_size);
2380 }
2381
2382 static inline void igb_set_vmolr(struct igb_adapter *adapter, int vfn)
2383 {
2384         struct e1000_hw *hw = &adapter->hw;
2385         u32 vmolr;
2386
2387         /*
2388          * This register exists only on 82576 and newer so if we are older then
2389          * we should exit and do nothing
2390          */
2391         if (hw->mac.type < e1000_82576)
2392                 return;
2393
2394         vmolr = rd32(E1000_VMOLR(vfn));
2395         vmolr |= E1000_VMOLR_AUPE |        /* Accept untagged packets */
2396                  E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2397
2398         /* clear all bits that might not be set */
2399         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2400
2401         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2402                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2403         /*
2404          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2405          * multicast packets
2406          */
2407         if (vfn <= adapter->vfs_allocated_count)
2408                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2409
2410         wr32(E1000_VMOLR(vfn), vmolr);
2411 }
2412
2413 /**
2414  * igb_configure_rx_ring - Configure a receive ring after Reset
2415  * @adapter: board private structure
2416  * @ring: receive ring to be configured
2417  *
2418  * Configure the Rx unit of the MAC after a reset.
2419  **/
2420 void igb_configure_rx_ring(struct igb_adapter *adapter,
2421                            struct igb_ring *ring)
2422 {
2423         struct e1000_hw *hw = &adapter->hw;
2424         u64 rdba = ring->dma;
2425         int reg_idx = ring->reg_idx;
2426         u32 srrctl, rxdctl;
2427
2428         /* disable the queue */
2429         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2430         wr32(E1000_RXDCTL(reg_idx),
2431                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2432
2433         /* Set DMA base address registers */
2434         wr32(E1000_RDBAL(reg_idx),
2435              rdba & 0x00000000ffffffffULL);
2436         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2437         wr32(E1000_RDLEN(reg_idx),
2438                        ring->count * sizeof(union e1000_adv_rx_desc));
2439
2440         /* initialize head and tail */
2441         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2442         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2443         writel(0, ring->head);
2444         writel(0, ring->tail);
2445
2446         /* set descriptor configuration */
2447         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2448                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2449                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2450 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2451                 srrctl |= IGB_RXBUFFER_16384 >>
2452                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2453 #else
2454                 srrctl |= (PAGE_SIZE / 2) >>
2455                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2456 #endif
2457                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2458         } else {
2459                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2460                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2461                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2462         }
2463
2464         wr32(E1000_SRRCTL(reg_idx), srrctl);
2465
2466         /* set filtering for VMDQ pools */
2467         igb_set_vmolr(adapter, reg_idx & 0x7);
2468
2469         /* enable receive descriptor fetching */
2470         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2471         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2472         rxdctl &= 0xFFF00000;
2473         rxdctl |= IGB_RX_PTHRESH;
2474         rxdctl |= IGB_RX_HTHRESH << 8;
2475         rxdctl |= IGB_RX_WTHRESH << 16;
2476         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2477 }
2478
2479 /**
2480  * igb_configure_rx - Configure receive Unit after Reset
2481  * @adapter: board private structure
2482  *
2483  * Configure the Rx unit of the MAC after a reset.
2484  **/
2485 static void igb_configure_rx(struct igb_adapter *adapter)
2486 {
2487         int i;
2488
2489         /* set UTA to appropriate mode */
2490         igb_set_uta(adapter);
2491
2492         /* set the correct pool for the PF default MAC address in entry 0 */
2493         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2494                          adapter->vfs_allocated_count);
2495
2496         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2497          * the Base and Length of the Rx Descriptor Ring */
2498         for (i = 0; i < adapter->num_rx_queues; i++)
2499                 igb_configure_rx_ring(adapter, &adapter->rx_ring[i]);
2500 }
2501
2502 /**
2503  * igb_free_tx_resources - Free Tx Resources per Queue
2504  * @tx_ring: Tx descriptor ring for a specific queue
2505  *
2506  * Free all transmit software resources
2507  **/
2508 void igb_free_tx_resources(struct igb_ring *tx_ring)
2509 {
2510         igb_clean_tx_ring(tx_ring);
2511
2512         vfree(tx_ring->buffer_info);
2513         tx_ring->buffer_info = NULL;
2514
2515         /* if not set, then don't free */
2516         if (!tx_ring->desc)
2517                 return;
2518
2519         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2520                             tx_ring->desc, tx_ring->dma);
2521
2522         tx_ring->desc = NULL;
2523 }
2524
2525 /**
2526  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2527  * @adapter: board private structure
2528  *
2529  * Free all transmit software resources
2530  **/
2531 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2532 {
2533         int i;
2534
2535         for (i = 0; i < adapter->num_tx_queues; i++)
2536                 igb_free_tx_resources(&adapter->tx_ring[i]);
2537 }
2538
2539 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2540                                     struct igb_buffer *buffer_info)
2541 {
2542         buffer_info->dma = 0;
2543         if (buffer_info->skb) {
2544                 skb_dma_unmap(&tx_ring->pdev->dev,
2545                               buffer_info->skb,
2546                               DMA_TO_DEVICE);
2547                 dev_kfree_skb_any(buffer_info->skb);
2548                 buffer_info->skb = NULL;
2549         }
2550         buffer_info->time_stamp = 0;
2551         /* buffer_info must be completely set up in the transmit path */
2552 }
2553
2554 /**
2555  * igb_clean_tx_ring - Free Tx Buffers
2556  * @tx_ring: ring to be cleaned
2557  **/
2558 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2559 {
2560         struct igb_buffer *buffer_info;
2561         unsigned long size;
2562         unsigned int i;
2563
2564         if (!tx_ring->buffer_info)
2565                 return;
2566         /* Free all the Tx ring sk_buffs */
2567
2568         for (i = 0; i < tx_ring->count; i++) {
2569                 buffer_info = &tx_ring->buffer_info[i];
2570                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2571         }
2572
2573         size = sizeof(struct igb_buffer) * tx_ring->count;
2574         memset(tx_ring->buffer_info, 0, size);
2575
2576         /* Zero out the descriptor ring */
2577         memset(tx_ring->desc, 0, tx_ring->size);
2578
2579         tx_ring->next_to_use = 0;
2580         tx_ring->next_to_clean = 0;
2581 }
2582
2583 /**
2584  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2585  * @adapter: board private structure
2586  **/
2587 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2588 {
2589         int i;
2590
2591         for (i = 0; i < adapter->num_tx_queues; i++)
2592                 igb_clean_tx_ring(&adapter->tx_ring[i]);
2593 }
2594
2595 /**
2596  * igb_free_rx_resources - Free Rx Resources
2597  * @rx_ring: ring to clean the resources from
2598  *
2599  * Free all receive software resources
2600  **/
2601 void igb_free_rx_resources(struct igb_ring *rx_ring)
2602 {
2603         igb_clean_rx_ring(rx_ring);
2604
2605         vfree(rx_ring->buffer_info);
2606         rx_ring->buffer_info = NULL;
2607
2608         /* if not set, then don't free */
2609         if (!rx_ring->desc)
2610                 return;
2611
2612         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2613                             rx_ring->desc, rx_ring->dma);
2614
2615         rx_ring->desc = NULL;
2616 }
2617
2618 /**
2619  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2620  * @adapter: board private structure
2621  *
2622  * Free all receive software resources
2623  **/
2624 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2625 {
2626         int i;
2627
2628         for (i = 0; i < adapter->num_rx_queues; i++)
2629                 igb_free_rx_resources(&adapter->rx_ring[i]);
2630 }
2631
2632 /**
2633  * igb_clean_rx_ring - Free Rx Buffers per Queue
2634  * @rx_ring: ring to free buffers from
2635  **/
2636 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2637 {
2638         struct igb_buffer *buffer_info;
2639         unsigned long size;
2640         unsigned int i;
2641
2642         if (!rx_ring->buffer_info)
2643                 return;
2644
2645         /* Free all the Rx ring sk_buffs */
2646         for (i = 0; i < rx_ring->count; i++) {
2647                 buffer_info = &rx_ring->buffer_info[i];
2648                 if (buffer_info->dma) {
2649                         pci_unmap_single(rx_ring->pdev,
2650                                          buffer_info->dma,
2651                                          rx_ring->rx_buffer_len,
2652                                          PCI_DMA_FROMDEVICE);
2653                         buffer_info->dma = 0;
2654                 }
2655
2656                 if (buffer_info->skb) {
2657                         dev_kfree_skb(buffer_info->skb);
2658                         buffer_info->skb = NULL;
2659                 }
2660                 if (buffer_info->page_dma) {
2661                         pci_unmap_page(rx_ring->pdev,
2662                                        buffer_info->page_dma,
2663                                        PAGE_SIZE / 2,
2664                                        PCI_DMA_FROMDEVICE);
2665                         buffer_info->page_dma = 0;
2666                 }
2667                 if (buffer_info->page) {
2668                         put_page(buffer_info->page);
2669                         buffer_info->page = NULL;
2670                         buffer_info->page_offset = 0;
2671                 }
2672         }
2673
2674         size = sizeof(struct igb_buffer) * rx_ring->count;
2675         memset(rx_ring->buffer_info, 0, size);
2676
2677         /* Zero out the descriptor ring */
2678         memset(rx_ring->desc, 0, rx_ring->size);
2679
2680         rx_ring->next_to_clean = 0;
2681         rx_ring->next_to_use = 0;
2682 }
2683
2684 /**
2685  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2686  * @adapter: board private structure
2687  **/
2688 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2689 {
2690         int i;
2691
2692         for (i = 0; i < adapter->num_rx_queues; i++)
2693                 igb_clean_rx_ring(&adapter->rx_ring[i]);
2694 }
2695
2696 /**
2697  * igb_set_mac - Change the Ethernet Address of the NIC
2698  * @netdev: network interface device structure
2699  * @p: pointer to an address structure
2700  *
2701  * Returns 0 on success, negative on failure
2702  **/
2703 static int igb_set_mac(struct net_device *netdev, void *p)
2704 {
2705         struct igb_adapter *adapter = netdev_priv(netdev);
2706         struct e1000_hw *hw = &adapter->hw;
2707         struct sockaddr *addr = p;
2708
2709         if (!is_valid_ether_addr(addr->sa_data))
2710                 return -EADDRNOTAVAIL;
2711
2712         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2713         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2714
2715         /* set the correct pool for the new PF MAC address in entry 0 */
2716         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2717                          adapter->vfs_allocated_count);
2718
2719         return 0;
2720 }
2721
2722 /**
2723  * igb_write_mc_addr_list - write multicast addresses to MTA
2724  * @netdev: network interface device structure
2725  *
2726  * Writes multicast address list to the MTA hash table.
2727  * Returns: -ENOMEM on failure
2728  *                0 on no addresses written
2729  *                X on writing X addresses to MTA
2730  **/
2731 static int igb_write_mc_addr_list(struct net_device *netdev)
2732 {
2733         struct igb_adapter *adapter = netdev_priv(netdev);
2734         struct e1000_hw *hw = &adapter->hw;
2735         struct dev_mc_list *mc_ptr = netdev->mc_list;
2736         u8  *mta_list;
2737         u32 vmolr = 0;
2738         int i;
2739
2740         if (!netdev->mc_count) {
2741                 /* nothing to program, so clear mc list */
2742                 igb_update_mc_addr_list(hw, NULL, 0);
2743                 igb_restore_vf_multicasts(adapter);
2744                 return 0;
2745         }
2746
2747         mta_list = kzalloc(netdev->mc_count * 6, GFP_ATOMIC);
2748         if (!mta_list)
2749                 return -ENOMEM;
2750
2751         /* set vmolr receive overflow multicast bit */
2752         vmolr |= E1000_VMOLR_ROMPE;
2753
2754         /* The shared function expects a packed array of only addresses. */
2755         mc_ptr = netdev->mc_list;
2756
2757         for (i = 0; i < netdev->mc_count; i++) {
2758                 if (!mc_ptr)
2759                         break;
2760                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2761                 mc_ptr = mc_ptr->next;
2762         }
2763         igb_update_mc_addr_list(hw, mta_list, i);
2764         kfree(mta_list);
2765
2766         return netdev->mc_count;
2767 }
2768
2769 /**
2770  * igb_write_uc_addr_list - write unicast addresses to RAR table
2771  * @netdev: network interface device structure
2772  *
2773  * Writes unicast address list to the RAR table.
2774  * Returns: -ENOMEM on failure/insufficient address space
2775  *                0 on no addresses written
2776  *                X on writing X addresses to the RAR table
2777  **/
2778 static int igb_write_uc_addr_list(struct net_device *netdev)
2779 {
2780         struct igb_adapter *adapter = netdev_priv(netdev);
2781         struct e1000_hw *hw = &adapter->hw;
2782         unsigned int vfn = adapter->vfs_allocated_count;
2783         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2784         int count = 0;
2785
2786         /* return ENOMEM indicating insufficient memory for addresses */
2787         if (netdev->uc.count > rar_entries)
2788                 return -ENOMEM;
2789
2790         if (netdev->uc.count && rar_entries) {
2791                 struct netdev_hw_addr *ha;
2792                 list_for_each_entry(ha, &netdev->uc.list, list) {
2793                         if (!rar_entries)
2794                                 break;
2795                         igb_rar_set_qsel(adapter, ha->addr,
2796                                          rar_entries--,
2797                                          vfn);
2798                         count++;
2799                 }
2800         }
2801         /* write the addresses in reverse order to avoid write combining */
2802         for (; rar_entries > 0 ; rar_entries--) {
2803                 wr32(E1000_RAH(rar_entries), 0);
2804                 wr32(E1000_RAL(rar_entries), 0);
2805         }
2806         wrfl();
2807
2808         return count;
2809 }
2810
2811 /**
2812  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2813  * @netdev: network interface device structure
2814  *
2815  * The set_rx_mode entry point is called whenever the unicast or multicast
2816  * address lists or the network interface flags are updated.  This routine is
2817  * responsible for configuring the hardware for proper unicast, multicast,
2818  * promiscuous mode, and all-multi behavior.
2819  **/
2820 static void igb_set_rx_mode(struct net_device *netdev)
2821 {
2822         struct igb_adapter *adapter = netdev_priv(netdev);
2823         struct e1000_hw *hw = &adapter->hw;
2824         unsigned int vfn = adapter->vfs_allocated_count;
2825         u32 rctl, vmolr = 0;
2826         int count;
2827
2828         /* Check for Promiscuous and All Multicast modes */
2829         rctl = rd32(E1000_RCTL);
2830
2831         /* clear the effected bits */
2832         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2833
2834         if (netdev->flags & IFF_PROMISC) {
2835                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2836                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2837         } else {
2838                 if (netdev->flags & IFF_ALLMULTI) {
2839                         rctl |= E1000_RCTL_MPE;
2840                         vmolr |= E1000_VMOLR_MPME;
2841                 } else {
2842                         /*
2843                          * Write addresses to the MTA, if the attempt fails
2844                          * then we should just turn on promiscous mode so
2845                          * that we can at least receive multicast traffic
2846                          */
2847                         count = igb_write_mc_addr_list(netdev);
2848                         if (count < 0) {
2849                                 rctl |= E1000_RCTL_MPE;
2850                                 vmolr |= E1000_VMOLR_MPME;
2851                         } else if (count) {
2852                                 vmolr |= E1000_VMOLR_ROMPE;
2853                         }
2854                 }
2855                 /*
2856                  * Write addresses to available RAR registers, if there is not
2857                  * sufficient space to store all the addresses then enable
2858                  * unicast promiscous mode
2859                  */
2860                 count = igb_write_uc_addr_list(netdev);
2861                 if (count < 0) {
2862                         rctl |= E1000_RCTL_UPE;
2863                         vmolr |= E1000_VMOLR_ROPE;
2864                 }
2865                 rctl |= E1000_RCTL_VFE;
2866         }
2867         wr32(E1000_RCTL, rctl);
2868
2869         /*
2870          * In order to support SR-IOV and eventually VMDq it is necessary to set
2871          * the VMOLR to enable the appropriate modes.  Without this workaround
2872          * we will have issues with VLAN tag stripping not being done for frames
2873          * that are only arriving because we are the default pool
2874          */
2875         if (hw->mac.type < e1000_82576)
2876                 return;
2877
2878         vmolr |= rd32(E1000_VMOLR(vfn)) &
2879                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
2880         wr32(E1000_VMOLR(vfn), vmolr);
2881         igb_restore_vf_multicasts(adapter);
2882 }
2883
2884 /* Need to wait a few seconds after link up to get diagnostic information from
2885  * the phy */
2886 static void igb_update_phy_info(unsigned long data)
2887 {
2888         struct igb_adapter *adapter = (struct igb_adapter *) data;
2889         igb_get_phy_info(&adapter->hw);
2890 }
2891
2892 /**
2893  * igb_has_link - check shared code for link and determine up/down
2894  * @adapter: pointer to driver private info
2895  **/
2896 static bool igb_has_link(struct igb_adapter *adapter)
2897 {
2898         struct e1000_hw *hw = &adapter->hw;
2899         bool link_active = false;
2900         s32 ret_val = 0;
2901
2902         /* get_link_status is set on LSC (link status) interrupt or
2903          * rx sequence error interrupt.  get_link_status will stay
2904          * false until the e1000_check_for_link establishes link
2905          * for copper adapters ONLY
2906          */
2907         switch (hw->phy.media_type) {
2908         case e1000_media_type_copper:
2909                 if (hw->mac.get_link_status) {
2910                         ret_val = hw->mac.ops.check_for_link(hw);
2911                         link_active = !hw->mac.get_link_status;
2912                 } else {
2913                         link_active = true;
2914                 }
2915                 break;
2916         case e1000_media_type_internal_serdes:
2917                 ret_val = hw->mac.ops.check_for_link(hw);
2918                 link_active = hw->mac.serdes_has_link;
2919                 break;
2920         default:
2921         case e1000_media_type_unknown:
2922                 break;
2923         }
2924
2925         return link_active;
2926 }
2927
2928 /**
2929  * igb_watchdog - Timer Call-back
2930  * @data: pointer to adapter cast into an unsigned long
2931  **/
2932 static void igb_watchdog(unsigned long data)
2933 {
2934         struct igb_adapter *adapter = (struct igb_adapter *)data;
2935         /* Do the rest outside of interrupt context */
2936         schedule_work(&adapter->watchdog_task);
2937 }
2938
2939 static void igb_watchdog_task(struct work_struct *work)
2940 {
2941         struct igb_adapter *adapter = container_of(work,
2942                                                    struct igb_adapter,
2943                                                    watchdog_task);
2944         struct e1000_hw *hw = &adapter->hw;
2945         struct net_device *netdev = adapter->netdev;
2946         u32 link;
2947         int i;
2948
2949         link = igb_has_link(adapter);
2950         if (link) {
2951                 if (!netif_carrier_ok(netdev)) {
2952                         u32 ctrl;
2953                         hw->mac.ops.get_speed_and_duplex(hw,
2954                                                          &adapter->link_speed,
2955                                                          &adapter->link_duplex);
2956
2957                         ctrl = rd32(E1000_CTRL);
2958                         /* Links status message must follow this format */
2959                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
2960                                  "Flow Control: %s\n",
2961                                netdev->name,
2962                                adapter->link_speed,
2963                                adapter->link_duplex == FULL_DUPLEX ?
2964                                  "Full Duplex" : "Half Duplex",
2965                                ((ctrl & E1000_CTRL_TFCE) &&
2966                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
2967                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
2968                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
2969
2970                         /* tweak tx_queue_len according to speed/duplex and
2971                          * adjust the timeout factor */
2972                         netdev->tx_queue_len = adapter->tx_queue_len;
2973                         adapter->tx_timeout_factor = 1;
2974                         switch (adapter->link_speed) {
2975                         case SPEED_10:
2976                                 netdev->tx_queue_len = 10;
2977                                 adapter->tx_timeout_factor = 14;
2978                                 break;
2979                         case SPEED_100:
2980                                 netdev->tx_queue_len = 100;
2981                                 /* maybe add some timeout factor ? */
2982                                 break;
2983                         }
2984
2985                         netif_carrier_on(netdev);
2986
2987                         igb_ping_all_vfs(adapter);
2988
2989                         /* link state has changed, schedule phy info update */
2990                         if (!test_bit(__IGB_DOWN, &adapter->state))
2991                                 mod_timer(&adapter->phy_info_timer,
2992                                           round_jiffies(jiffies + 2 * HZ));
2993                 }
2994         } else {
2995                 if (netif_carrier_ok(netdev)) {
2996                         adapter->link_speed = 0;
2997                         adapter->link_duplex = 0;
2998                         /* Links status message must follow this format */
2999                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3000                                netdev->name);
3001                         netif_carrier_off(netdev);
3002
3003                         igb_ping_all_vfs(adapter);
3004
3005                         /* link state has changed, schedule phy info update */
3006                         if (!test_bit(__IGB_DOWN, &adapter->state))
3007                                 mod_timer(&adapter->phy_info_timer,
3008                                           round_jiffies(jiffies + 2 * HZ));
3009                 }
3010         }
3011
3012         igb_update_stats(adapter);
3013         igb_update_adaptive(hw);
3014
3015         for (i = 0; i < adapter->num_tx_queues; i++) {
3016                 struct igb_ring *tx_ring = &adapter->tx_ring[i];
3017                 if (!netif_carrier_ok(netdev)) {
3018                         /* We've lost link, so the controller stops DMA,
3019                          * but we've got queued Tx work that's never going
3020                          * to get done, so reset controller to flush Tx.
3021                          * (Do the reset outside of interrupt context). */
3022                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3023                                 adapter->tx_timeout_count++;
3024                                 schedule_work(&adapter->reset_task);
3025                                 /* return immediately since reset is imminent */
3026                                 return;
3027                         }
3028                 }
3029
3030                 /* Force detection of hung controller every watchdog period */
3031                 tx_ring->detect_tx_hung = true;
3032         }
3033
3034         /* Cause software interrupt to ensure rx ring is cleaned */
3035         if (adapter->msix_entries) {
3036                 u32 eics = 0;
3037                 for (i = 0; i < adapter->num_q_vectors; i++) {
3038                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3039                         eics |= q_vector->eims_value;
3040                 }
3041                 wr32(E1000_EICS, eics);
3042         } else {
3043                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3044         }
3045
3046         /* Reset the timer */
3047         if (!test_bit(__IGB_DOWN, &adapter->state))
3048                 mod_timer(&adapter->watchdog_timer,
3049                           round_jiffies(jiffies + 2 * HZ));
3050 }
3051
3052 enum latency_range {
3053         lowest_latency = 0,
3054         low_latency = 1,
3055         bulk_latency = 2,
3056         latency_invalid = 255
3057 };
3058
3059 /**
3060  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3061  *
3062  *      Stores a new ITR value based on strictly on packet size.  This
3063  *      algorithm is less sophisticated than that used in igb_update_itr,
3064  *      due to the difficulty of synchronizing statistics across multiple
3065  *      receive rings.  The divisors and thresholds used by this fuction
3066  *      were determined based on theoretical maximum wire speed and testing
3067  *      data, in order to minimize response time while increasing bulk
3068  *      throughput.
3069  *      This functionality is controlled by the InterruptThrottleRate module
3070  *      parameter (see igb_param.c)
3071  *      NOTE:  This function is called only when operating in a multiqueue
3072  *             receive environment.
3073  * @q_vector: pointer to q_vector
3074  **/
3075 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3076 {
3077         int new_val = q_vector->itr_val;
3078         int avg_wire_size = 0;
3079         struct igb_adapter *adapter = q_vector->adapter;
3080
3081         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3082          * ints/sec - ITR timer value of 120 ticks.
3083          */
3084         if (adapter->link_speed != SPEED_1000) {
3085                 new_val = 976;
3086                 goto set_itr_val;
3087         }
3088
3089         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3090                 struct igb_ring *ring = q_vector->rx_ring;
3091                 avg_wire_size = ring->total_bytes / ring->total_packets;
3092         }
3093
3094         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3095                 struct igb_ring *ring = q_vector->tx_ring;
3096                 avg_wire_size = max_t(u32, avg_wire_size,
3097                                       (ring->total_bytes /
3098                                        ring->total_packets));
3099         }
3100
3101         /* if avg_wire_size isn't set no work was done */
3102         if (!avg_wire_size)
3103                 goto clear_counts;
3104
3105         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3106         avg_wire_size += 24;
3107
3108         /* Don't starve jumbo frames */
3109         avg_wire_size = min(avg_wire_size, 3000);
3110
3111         /* Give a little boost to mid-size frames */
3112         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3113                 new_val = avg_wire_size / 3;
3114         else
3115                 new_val = avg_wire_size / 2;
3116
3117 set_itr_val:
3118         if (new_val != q_vector->itr_val) {
3119                 q_vector->itr_val = new_val;
3120                 q_vector->set_itr = 1;
3121         }
3122 clear_counts:
3123         if (q_vector->rx_ring) {
3124                 q_vector->rx_ring->total_bytes = 0;
3125                 q_vector->rx_ring->total_packets = 0;
3126         }
3127         if (q_vector->tx_ring) {
3128                 q_vector->tx_ring->total_bytes = 0;
3129                 q_vector->tx_ring->total_packets = 0;
3130         }
3131 }
3132
3133 /**
3134  * igb_update_itr - update the dynamic ITR value based on statistics
3135  *      Stores a new ITR value based on packets and byte
3136  *      counts during the last interrupt.  The advantage of per interrupt
3137  *      computation is faster updates and more accurate ITR for the current
3138  *      traffic pattern.  Constants in this function were computed
3139  *      based on theoretical maximum wire speed and thresholds were set based
3140  *      on testing data as well as attempting to minimize response time
3141  *      while increasing bulk throughput.
3142  *      this functionality is controlled by the InterruptThrottleRate module
3143  *      parameter (see igb_param.c)
3144  *      NOTE:  These calculations are only valid when operating in a single-
3145  *             queue environment.
3146  * @adapter: pointer to adapter
3147  * @itr_setting: current q_vector->itr_val
3148  * @packets: the number of packets during this measurement interval
3149  * @bytes: the number of bytes during this measurement interval
3150  **/
3151 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3152                                    int packets, int bytes)
3153 {
3154         unsigned int retval = itr_setting;
3155
3156         if (packets == 0)
3157                 goto update_itr_done;
3158
3159         switch (itr_setting) {
3160         case lowest_latency:
3161                 /* handle TSO and jumbo frames */
3162                 if (bytes/packets > 8000)
3163                         retval = bulk_latency;
3164                 else if ((packets < 5) && (bytes > 512))
3165                         retval = low_latency;
3166                 break;
3167         case low_latency:  /* 50 usec aka 20000 ints/s */
3168                 if (bytes > 10000) {
3169                         /* this if handles the TSO accounting */
3170                         if (bytes/packets > 8000) {
3171                                 retval = bulk_latency;
3172                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3173                                 retval = bulk_latency;
3174                         } else if ((packets > 35)) {
3175                                 retval = lowest_latency;
3176                         }
3177                 } else if (bytes/packets > 2000) {
3178                         retval = bulk_latency;
3179                 } else if (packets <= 2 && bytes < 512) {
3180                         retval = lowest_latency;
3181                 }
3182                 break;
3183         case bulk_latency: /* 250 usec aka 4000 ints/s */
3184                 if (bytes > 25000) {
3185                         if (packets > 35)
3186                                 retval = low_latency;
3187                 } else if (bytes < 1500) {
3188                         retval = low_latency;
3189                 }
3190                 break;
3191         }
3192
3193 update_itr_done:
3194         return retval;
3195 }
3196
3197 static void igb_set_itr(struct igb_adapter *adapter)
3198 {
3199         struct igb_q_vector *q_vector = adapter->q_vector[0];
3200         u16 current_itr;
3201         u32 new_itr = q_vector->itr_val;
3202
3203         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3204         if (adapter->link_speed != SPEED_1000) {
3205                 current_itr = 0;
3206                 new_itr = 4000;
3207                 goto set_itr_now;
3208         }
3209
3210         adapter->rx_itr = igb_update_itr(adapter,
3211                                     adapter->rx_itr,
3212                                     adapter->rx_ring->total_packets,
3213                                     adapter->rx_ring->total_bytes);
3214
3215         adapter->tx_itr = igb_update_itr(adapter,
3216                                     adapter->tx_itr,
3217                                     adapter->tx_ring->total_packets,
3218                                     adapter->tx_ring->total_bytes);
3219         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3220
3221         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3222         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3223                 current_itr = low_latency;
3224
3225         switch (current_itr) {
3226         /* counts and packets in update_itr are dependent on these numbers */
3227         case lowest_latency:
3228                 new_itr = 56;  /* aka 70,000 ints/sec */
3229                 break;
3230         case low_latency:
3231                 new_itr = 196; /* aka 20,000 ints/sec */
3232                 break;
3233         case bulk_latency:
3234                 new_itr = 980; /* aka 4,000 ints/sec */
3235                 break;
3236         default:
3237                 break;
3238         }
3239
3240 set_itr_now:
3241         adapter->rx_ring->total_bytes = 0;
3242         adapter->rx_ring->total_packets = 0;
3243         adapter->tx_ring->total_bytes = 0;
3244         adapter->tx_ring->total_packets = 0;
3245
3246         if (new_itr != q_vector->itr_val) {
3247                 /* this attempts to bias the interrupt rate towards Bulk
3248                  * by adding intermediate steps when interrupt rate is
3249                  * increasing */
3250                 new_itr = new_itr > q_vector->itr_val ?
3251                              max((new_itr * q_vector->itr_val) /
3252                                  (new_itr + (q_vector->itr_val >> 2)),
3253                                  new_itr) :
3254                              new_itr;
3255                 /* Don't write the value here; it resets the adapter's
3256                  * internal timer, and causes us to delay far longer than
3257                  * we should between interrupts.  Instead, we write the ITR
3258                  * value at the beginning of the next interrupt so the timing
3259                  * ends up being correct.
3260                  */
3261                 q_vector->itr_val = new_itr;
3262                 q_vector->set_itr = 1;
3263         }
3264
3265         return;
3266 }
3267
3268 #define IGB_TX_FLAGS_CSUM               0x00000001
3269 #define IGB_TX_FLAGS_VLAN               0x00000002
3270 #define IGB_TX_FLAGS_TSO                0x00000004
3271 #define IGB_TX_FLAGS_IPV4               0x00000008
3272 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3273 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3274 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3275
3276 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3277                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3278 {
3279         struct e1000_adv_tx_context_desc *context_desc;
3280         unsigned int i;
3281         int err;
3282         struct igb_buffer *buffer_info;
3283         u32 info = 0, tu_cmd = 0;
3284         u32 mss_l4len_idx, l4len;
3285         *hdr_len = 0;
3286
3287         if (skb_header_cloned(skb)) {
3288                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3289                 if (err)
3290                         return err;
3291         }
3292
3293         l4len = tcp_hdrlen(skb);
3294         *hdr_len += l4len;
3295
3296         if (skb->protocol == htons(ETH_P_IP)) {
3297                 struct iphdr *iph = ip_hdr(skb);
3298                 iph->tot_len = 0;
3299                 iph->check = 0;
3300                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3301                                                          iph->daddr, 0,
3302                                                          IPPROTO_TCP,
3303                                                          0);
3304         } else if (skb_shinfo(skb)->gso_type == SKB_GSO_TCPV6) {
3305                 ipv6_hdr(skb)->payload_len = 0;
3306                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3307                                                        &ipv6_hdr(skb)->daddr,
3308                                                        0, IPPROTO_TCP, 0);
3309         }
3310
3311         i = tx_ring->next_to_use;
3312
3313         buffer_info = &tx_ring->buffer_info[i];
3314         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3315         /* VLAN MACLEN IPLEN */
3316         if (tx_flags & IGB_TX_FLAGS_VLAN)
3317                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3318         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3319         *hdr_len += skb_network_offset(skb);
3320         info |= skb_network_header_len(skb);
3321         *hdr_len += skb_network_header_len(skb);
3322         context_desc->vlan_macip_lens = cpu_to_le32(info);
3323
3324         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3325         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3326
3327         if (skb->protocol == htons(ETH_P_IP))
3328                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3329         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3330
3331         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3332
3333         /* MSS L4LEN IDX */
3334         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3335         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3336
3337         /* For 82575, context index must be unique per ring. */
3338         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3339                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3340
3341         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3342         context_desc->seqnum_seed = 0;
3343
3344         buffer_info->time_stamp = jiffies;
3345         buffer_info->next_to_watch = i;
3346         buffer_info->dma = 0;
3347         i++;
3348         if (i == tx_ring->count)
3349                 i = 0;
3350
3351         tx_ring->next_to_use = i;
3352
3353         return true;
3354 }
3355
3356 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3357                                    struct sk_buff *skb, u32 tx_flags)
3358 {
3359         struct e1000_adv_tx_context_desc *context_desc;
3360         struct pci_dev *pdev = tx_ring->pdev;
3361         struct igb_buffer *buffer_info;
3362         u32 info = 0, tu_cmd = 0;
3363         unsigned int i;
3364
3365         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3366             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3367                 i = tx_ring->next_to_use;
3368                 buffer_info = &tx_ring->buffer_info[i];
3369                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3370
3371                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3372                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3373
3374                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3375                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3376                         info |= skb_network_header_len(skb);
3377
3378                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3379
3380                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3381
3382                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3383                         __be16 protocol;
3384
3385                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3386                                 const struct vlan_ethhdr *vhdr =
3387                                           (const struct vlan_ethhdr*)skb->data;
3388
3389                                 protocol = vhdr->h_vlan_encapsulated_proto;
3390                         } else {
3391                                 protocol = skb->protocol;
3392                         }
3393
3394                         switch (protocol) {
3395                         case cpu_to_be16(ETH_P_IP):
3396                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3397                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3398                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3399                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3400                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3401                                 break;
3402                         case cpu_to_be16(ETH_P_IPV6):
3403                                 /* XXX what about other V6 headers?? */
3404                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3405                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3406                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3407                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3408                                 break;
3409                         default:
3410                                 if (unlikely(net_ratelimit()))
3411                                         dev_warn(&pdev->dev,
3412                                             "partial checksum but proto=%x!\n",
3413                                             skb->protocol);
3414                                 break;
3415                         }
3416                 }
3417
3418                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3419                 context_desc->seqnum_seed = 0;
3420                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3421                         context_desc->mss_l4len_idx =
3422                                 cpu_to_le32(tx_ring->reg_idx << 4);
3423
3424                 buffer_info->time_stamp = jiffies;
3425                 buffer_info->next_to_watch = i;
3426                 buffer_info->dma = 0;
3427
3428                 i++;
3429                 if (i == tx_ring->count)
3430                         i = 0;
3431                 tx_ring->next_to_use = i;
3432
3433                 return true;
3434         }
3435         return false;
3436 }
3437
3438 #define IGB_MAX_TXD_PWR 16
3439 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3440
3441 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3442                                  unsigned int first)
3443 {
3444         struct igb_buffer *buffer_info;
3445         struct pci_dev *pdev = tx_ring->pdev;
3446         unsigned int len = skb_headlen(skb);
3447         unsigned int count = 0, i;
3448         unsigned int f;
3449         dma_addr_t *map;
3450
3451         i = tx_ring->next_to_use;
3452
3453         if (skb_dma_map(&pdev->dev, skb, DMA_TO_DEVICE)) {
3454                 dev_err(&pdev->dev, "TX DMA map failed\n");
3455                 return 0;
3456         }
3457
3458         map = skb_shinfo(skb)->dma_maps;
3459
3460         buffer_info = &tx_ring->buffer_info[i];
3461         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3462         buffer_info->length = len;
3463         /* set time_stamp *before* dma to help avoid a possible race */
3464         buffer_info->time_stamp = jiffies;
3465         buffer_info->next_to_watch = i;
3466         buffer_info->dma = skb_shinfo(skb)->dma_head;
3467
3468         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3469                 struct skb_frag_struct *frag;
3470
3471                 i++;
3472                 if (i == tx_ring->count)
3473                         i = 0;
3474
3475                 frag = &skb_shinfo(skb)->frags[f];
3476                 len = frag->size;
3477
3478                 buffer_info = &tx_ring->buffer_info[i];
3479                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3480                 buffer_info->length = len;
3481                 buffer_info->time_stamp = jiffies;
3482                 buffer_info->next_to_watch = i;
3483                 buffer_info->dma = map[count];
3484                 count++;
3485         }
3486
3487         tx_ring->buffer_info[i].skb = skb;
3488         tx_ring->buffer_info[first].next_to_watch = i;
3489
3490         return ++count;
3491 }
3492
3493 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3494                                     int tx_flags, int count, u32 paylen,
3495                                     u8 hdr_len)
3496 {
3497         union e1000_adv_tx_desc *tx_desc;
3498         struct igb_buffer *buffer_info;
3499         u32 olinfo_status = 0, cmd_type_len;
3500         unsigned int i = tx_ring->next_to_use;
3501
3502         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3503                         E1000_ADVTXD_DCMD_DEXT);
3504
3505         if (tx_flags & IGB_TX_FLAGS_VLAN)
3506                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3507
3508         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3509                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3510
3511         if (tx_flags & IGB_TX_FLAGS_TSO) {
3512                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3513
3514                 /* insert tcp checksum */
3515                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3516
3517                 /* insert ip checksum */
3518                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3519                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3520
3521         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3522                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3523         }
3524
3525         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3526             (tx_flags & (IGB_TX_FLAGS_CSUM |
3527                          IGB_TX_FLAGS_TSO |
3528                          IGB_TX_FLAGS_VLAN)))
3529                 olinfo_status |= tx_ring->reg_idx << 4;
3530
3531         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3532
3533         do {
3534                 buffer_info = &tx_ring->buffer_info[i];
3535                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3536                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3537                 tx_desc->read.cmd_type_len =
3538                         cpu_to_le32(cmd_type_len | buffer_info->length);
3539                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3540                 count--;
3541                 i++;
3542                 if (i == tx_ring->count)
3543                         i = 0;
3544         } while (count > 0);
3545
3546         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3547         /* Force memory writes to complete before letting h/w
3548          * know there are new descriptors to fetch.  (Only
3549          * applicable for weak-ordered memory model archs,
3550          * such as IA-64). */
3551         wmb();
3552
3553         tx_ring->next_to_use = i;
3554         writel(i, tx_ring->tail);
3555         /* we need this if more than one processor can write to our tail
3556          * at a time, it syncronizes IO on IA64/Altix systems */
3557         mmiowb();
3558 }
3559
3560 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3561 {
3562         struct net_device *netdev = tx_ring->netdev;
3563
3564         netif_stop_subqueue(netdev, tx_ring->queue_index);
3565
3566         /* Herbert's original patch had:
3567          *  smp_mb__after_netif_stop_queue();
3568          * but since that doesn't exist yet, just open code it. */
3569         smp_mb();
3570
3571         /* We need to check again in a case another CPU has just
3572          * made room available. */
3573         if (igb_desc_unused(tx_ring) < size)
3574                 return -EBUSY;
3575
3576         /* A reprieve! */
3577         netif_wake_subqueue(netdev, tx_ring->queue_index);
3578         tx_ring->tx_stats.restart_queue++;
3579         return 0;
3580 }
3581
3582 static int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3583 {
3584         if (igb_desc_unused(tx_ring) >= size)
3585                 return 0;
3586         return __igb_maybe_stop_tx(tx_ring, size);
3587 }
3588
3589 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3590                                     struct igb_ring *tx_ring)
3591 {
3592         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3593         unsigned int first;
3594         unsigned int tx_flags = 0;
3595         u8 hdr_len = 0;
3596         int tso = 0, count;
3597         union skb_shared_tx *shtx = skb_tx(skb);
3598
3599         /* need: 1 descriptor per page,
3600          *       + 2 desc gap to keep tail from touching head,
3601          *       + 1 desc for skb->data,
3602          *       + 1 desc for context descriptor,
3603          * otherwise try next time */
3604         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3605                 /* this is a hard error */
3606                 return NETDEV_TX_BUSY;
3607         }
3608
3609         if (unlikely(shtx->hardware)) {
3610                 shtx->in_progress = 1;
3611                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3612         }
3613
3614         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3615                 tx_flags |= IGB_TX_FLAGS_VLAN;
3616                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3617         }
3618
3619         if (skb->protocol == htons(ETH_P_IP))
3620                 tx_flags |= IGB_TX_FLAGS_IPV4;
3621
3622         first = tx_ring->next_to_use;
3623         if (skb_is_gso(skb)) {
3624                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3625
3626                 if (tso < 0) {
3627                         dev_kfree_skb_any(skb);
3628                         return NETDEV_TX_OK;
3629                 }
3630         }
3631
3632         if (tso)
3633                 tx_flags |= IGB_TX_FLAGS_TSO;
3634         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3635                  (skb->ip_summed == CHECKSUM_PARTIAL))
3636                 tx_flags |= IGB_TX_FLAGS_CSUM;
3637
3638         /*
3639          * count reflects descriptors mapped, if 0 or less then mapping error
3640          * has occured and we need to rewind the descriptor queue
3641          */
3642         count = igb_tx_map_adv(tx_ring, skb, first);
3643         if (count <= 0) {
3644                 dev_kfree_skb_any(skb);
3645                 tx_ring->buffer_info[first].time_stamp = 0;
3646                 tx_ring->next_to_use = first;
3647                 return NETDEV_TX_OK;
3648         }
3649
3650         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3651
3652         /* Make sure there is space in the ring for the next send. */
3653         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3654
3655         return NETDEV_TX_OK;
3656 }
3657
3658 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3659                                       struct net_device *netdev)
3660 {
3661         struct igb_adapter *adapter = netdev_priv(netdev);
3662         struct igb_ring *tx_ring;
3663         int r_idx = 0;
3664
3665         if (test_bit(__IGB_DOWN, &adapter->state)) {
3666                 dev_kfree_skb_any(skb);
3667                 return NETDEV_TX_OK;
3668         }
3669
3670         if (skb->len <= 0) {
3671                 dev_kfree_skb_any(skb);
3672                 return NETDEV_TX_OK;
3673         }
3674
3675         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3676         tx_ring = adapter->multi_tx_table[r_idx];
3677
3678         /* This goes back to the question of how to logically map a tx queue
3679          * to a flow.  Right now, performance is impacted slightly negatively
3680          * if using multiple tx queues.  If the stack breaks away from a
3681          * single qdisc implementation, we can look at this again. */
3682         return igb_xmit_frame_ring_adv(skb, tx_ring);
3683 }
3684
3685 /**
3686  * igb_tx_timeout - Respond to a Tx Hang
3687  * @netdev: network interface device structure
3688  **/
3689 static void igb_tx_timeout(struct net_device *netdev)
3690 {
3691         struct igb_adapter *adapter = netdev_priv(netdev);
3692         struct e1000_hw *hw = &adapter->hw;
3693
3694         /* Do the reset outside of interrupt context */
3695         adapter->tx_timeout_count++;
3696
3697         schedule_work(&adapter->reset_task);
3698         wr32(E1000_EICS,
3699              (adapter->eims_enable_mask & ~adapter->eims_other));
3700 }
3701
3702 static void igb_reset_task(struct work_struct *work)
3703 {
3704         struct igb_adapter *adapter;
3705         adapter = container_of(work, struct igb_adapter, reset_task);
3706
3707         igb_reinit_locked(adapter);
3708 }
3709
3710 /**
3711  * igb_get_stats - Get System Network Statistics
3712  * @netdev: network interface device structure
3713  *
3714  * Returns the address of the device statistics structure.
3715  * The statistics are actually updated from the timer callback.
3716  **/
3717 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3718 {
3719         /* only return the current stats */
3720         return &netdev->stats;
3721 }
3722
3723 /**
3724  * igb_change_mtu - Change the Maximum Transfer Unit
3725  * @netdev: network interface device structure
3726  * @new_mtu: new value for maximum frame size
3727  *
3728  * Returns 0 on success, negative on failure
3729  **/
3730 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3731 {
3732         struct igb_adapter *adapter = netdev_priv(netdev);
3733         struct pci_dev *pdev = adapter->pdev;
3734         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3735         u32 rx_buffer_len, i;
3736
3737         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3738                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3739                 return -EINVAL;
3740         }
3741
3742         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3743                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3744                 return -EINVAL;
3745         }
3746
3747         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3748                 msleep(1);
3749
3750         /* igb_down has a dependency on max_frame_size */
3751         adapter->max_frame_size = max_frame;
3752
3753         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3754          * means we reserve 2 more, this pushes us to allocate from the next
3755          * larger slab size.
3756          * i.e. RXBUFFER_2048 --> size-4096 slab
3757          */
3758
3759         if (max_frame <= IGB_RXBUFFER_1024)
3760                 rx_buffer_len = IGB_RXBUFFER_1024;
3761         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3762                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3763         else
3764                 rx_buffer_len = IGB_RXBUFFER_128;
3765
3766         if (netif_running(netdev))
3767                 igb_down(adapter);
3768
3769         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3770                  netdev->mtu, new_mtu);
3771         netdev->mtu = new_mtu;
3772
3773         for (i = 0; i < adapter->num_rx_queues; i++)
3774                 adapter->rx_ring[i].rx_buffer_len = rx_buffer_len;
3775
3776         if (netif_running(netdev))
3777                 igb_up(adapter);
3778         else
3779                 igb_reset(adapter);
3780
3781         clear_bit(__IGB_RESETTING, &adapter->state);
3782
3783         return 0;
3784 }
3785
3786 /**
3787  * igb_update_stats - Update the board statistics counters
3788  * @adapter: board private structure
3789  **/
3790
3791 void igb_update_stats(struct igb_adapter *adapter)
3792 {
3793         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3794         struct e1000_hw *hw = &adapter->hw;
3795         struct pci_dev *pdev = adapter->pdev;
3796         u32 rnbc;
3797         u16 phy_tmp;
3798         int i;
3799         u64 bytes, packets;
3800
3801 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3802
3803         /*
3804          * Prevent stats update while adapter is being reset, or if the pci
3805          * connection is down.
3806          */
3807         if (adapter->link_speed == 0)
3808                 return;
3809         if (pci_channel_offline(pdev))
3810                 return;
3811
3812         bytes = 0;
3813         packets = 0;
3814         for (i = 0; i < adapter->num_rx_queues; i++) {
3815                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3816                 adapter->rx_ring[i].rx_stats.drops += rqdpc_tmp;
3817                 net_stats->rx_fifo_errors += rqdpc_tmp;
3818                 bytes += adapter->rx_ring[i].rx_stats.bytes;
3819                 packets += adapter->rx_ring[i].rx_stats.packets;
3820         }
3821
3822         net_stats->rx_bytes = bytes;
3823         net_stats->rx_packets = packets;
3824
3825         bytes = 0;
3826         packets = 0;
3827         for (i = 0; i < adapter->num_tx_queues; i++) {
3828                 bytes += adapter->tx_ring[i].tx_stats.bytes;
3829                 packets += adapter->tx_ring[i].tx_stats.packets;
3830         }
3831         net_stats->tx_bytes = bytes;
3832         net_stats->tx_packets = packets;
3833
3834         /* read stats registers */
3835         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3836         adapter->stats.gprc += rd32(E1000_GPRC);
3837         adapter->stats.gorc += rd32(E1000_GORCL);
3838         rd32(E1000_GORCH); /* clear GORCL */
3839         adapter->stats.bprc += rd32(E1000_BPRC);
3840         adapter->stats.mprc += rd32(E1000_MPRC);
3841         adapter->stats.roc += rd32(E1000_ROC);
3842
3843         adapter->stats.prc64 += rd32(E1000_PRC64);
3844         adapter->stats.prc127 += rd32(E1000_PRC127);
3845         adapter->stats.prc255 += rd32(E1000_PRC255);
3846         adapter->stats.prc511 += rd32(E1000_PRC511);
3847         adapter->stats.prc1023 += rd32(E1000_PRC1023);
3848         adapter->stats.prc1522 += rd32(E1000_PRC1522);
3849         adapter->stats.symerrs += rd32(E1000_SYMERRS);
3850         adapter->stats.sec += rd32(E1000_SEC);
3851
3852         adapter->stats.mpc += rd32(E1000_MPC);
3853         adapter->stats.scc += rd32(E1000_SCC);
3854         adapter->stats.ecol += rd32(E1000_ECOL);
3855         adapter->stats.mcc += rd32(E1000_MCC);
3856         adapter->stats.latecol += rd32(E1000_LATECOL);
3857         adapter->stats.dc += rd32(E1000_DC);
3858         adapter->stats.rlec += rd32(E1000_RLEC);
3859         adapter->stats.xonrxc += rd32(E1000_XONRXC);
3860         adapter->stats.xontxc += rd32(E1000_XONTXC);
3861         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
3862         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
3863         adapter->stats.fcruc += rd32(E1000_FCRUC);
3864         adapter->stats.gptc += rd32(E1000_GPTC);
3865         adapter->stats.gotc += rd32(E1000_GOTCL);
3866         rd32(E1000_GOTCH); /* clear GOTCL */
3867         rnbc = rd32(E1000_RNBC);
3868         adapter->stats.rnbc += rnbc;
3869         net_stats->rx_fifo_errors += rnbc;
3870         adapter->stats.ruc += rd32(E1000_RUC);
3871         adapter->stats.rfc += rd32(E1000_RFC);
3872         adapter->stats.rjc += rd32(E1000_RJC);
3873         adapter->stats.tor += rd32(E1000_TORH);
3874         adapter->stats.tot += rd32(E1000_TOTH);
3875         adapter->stats.tpr += rd32(E1000_TPR);
3876
3877         adapter->stats.ptc64 += rd32(E1000_PTC64);
3878         adapter->stats.ptc127 += rd32(E1000_PTC127);
3879         adapter->stats.ptc255 += rd32(E1000_PTC255);
3880         adapter->stats.ptc511 += rd32(E1000_PTC511);
3881         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
3882         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
3883
3884         adapter->stats.mptc += rd32(E1000_MPTC);
3885         adapter->stats.bptc += rd32(E1000_BPTC);
3886
3887         /* used for adaptive IFS */
3888         hw->mac.tx_packet_delta = rd32(E1000_TPT);
3889         adapter->stats.tpt += hw->mac.tx_packet_delta;
3890         hw->mac.collision_delta = rd32(E1000_COLC);
3891         adapter->stats.colc += hw->mac.collision_delta;
3892
3893         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
3894         adapter->stats.rxerrc += rd32(E1000_RXERRC);
3895         adapter->stats.tncrs += rd32(E1000_TNCRS);
3896         adapter->stats.tsctc += rd32(E1000_TSCTC);
3897         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
3898
3899         adapter->stats.iac += rd32(E1000_IAC);
3900         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
3901         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
3902         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
3903         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
3904         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
3905         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
3906         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
3907         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
3908
3909         /* Fill out the OS statistics structure */
3910         net_stats->multicast = adapter->stats.mprc;
3911         net_stats->collisions = adapter->stats.colc;
3912
3913         /* Rx Errors */
3914
3915         /* RLEC on some newer hardware can be incorrect so build
3916          * our own version based on RUC and ROC */
3917         net_stats->rx_errors = adapter->stats.rxerrc +
3918                 adapter->stats.crcerrs + adapter->stats.algnerrc +
3919                 adapter->stats.ruc + adapter->stats.roc +
3920                 adapter->stats.cexterr;
3921         net_stats->rx_length_errors = adapter->stats.ruc +
3922                                       adapter->stats.roc;
3923         net_stats->rx_crc_errors = adapter->stats.crcerrs;
3924         net_stats->rx_frame_errors = adapter->stats.algnerrc;
3925         net_stats->rx_missed_errors = adapter->stats.mpc;
3926
3927         /* Tx Errors */
3928         net_stats->tx_errors = adapter->stats.ecol +
3929                                adapter->stats.latecol;
3930         net_stats->tx_aborted_errors = adapter->stats.ecol;
3931         net_stats->tx_window_errors = adapter->stats.latecol;
3932         net_stats->tx_carrier_errors = adapter->stats.tncrs;
3933
3934         /* Tx Dropped needs to be maintained elsewhere */
3935
3936         /* Phy Stats */
3937         if (hw->phy.media_type == e1000_media_type_copper) {
3938                 if ((adapter->link_speed == SPEED_1000) &&
3939                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
3940                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
3941                         adapter->phy_stats.idle_errors += phy_tmp;
3942                 }
3943         }
3944
3945         /* Management Stats */
3946         adapter->stats.mgptc += rd32(E1000_MGTPTC);
3947         adapter->stats.mgprc += rd32(E1000_MGTPRC);
3948         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
3949 }
3950
3951 static irqreturn_t igb_msix_other(int irq, void *data)
3952 {
3953         struct igb_adapter *adapter = data;
3954         struct e1000_hw *hw = &adapter->hw;
3955         u32 icr = rd32(E1000_ICR);
3956         /* reading ICR causes bit 31 of EICR to be cleared */
3957
3958         if (icr & E1000_ICR_DOUTSYNC) {
3959                 /* HW is reporting DMA is out of sync */
3960                 adapter->stats.doosync++;
3961         }
3962
3963         /* Check for a mailbox event */
3964         if (icr & E1000_ICR_VMMB)
3965                 igb_msg_task(adapter);
3966
3967         if (icr & E1000_ICR_LSC) {
3968                 hw->mac.get_link_status = 1;
3969                 /* guard against interrupt when we're going down */
3970                 if (!test_bit(__IGB_DOWN, &adapter->state))
3971                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
3972         }
3973
3974         if (adapter->vfs_allocated_count)
3975                 wr32(E1000_IMS, E1000_IMS_LSC |
3976                                 E1000_IMS_VMMB |
3977                                 E1000_IMS_DOUTSYNC);
3978         else
3979                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
3980         wr32(E1000_EIMS, adapter->eims_other);
3981
3982         return IRQ_HANDLED;
3983 }
3984
3985 static void igb_write_itr(struct igb_q_vector *q_vector)
3986 {
3987         u32 itr_val = q_vector->itr_val & 0x7FFC;
3988
3989         if (!q_vector->set_itr)
3990                 return;
3991
3992         if (!itr_val)
3993                 itr_val = 0x4;
3994
3995         if (q_vector->itr_shift)
3996                 itr_val |= itr_val << q_vector->itr_shift;
3997         else
3998                 itr_val |= 0x8000000;
3999
4000         writel(itr_val, q_vector->itr_register);
4001         q_vector->set_itr = 0;
4002 }
4003
4004 static irqreturn_t igb_msix_ring(int irq, void *data)
4005 {
4006         struct igb_q_vector *q_vector = data;
4007
4008         /* Write the ITR value calculated from the previous interrupt. */
4009         igb_write_itr(q_vector);
4010
4011         napi_schedule(&q_vector->napi);
4012
4013         return IRQ_HANDLED;
4014 }
4015
4016 #ifdef CONFIG_IGB_DCA
4017 static void igb_update_dca(struct igb_q_vector *q_vector)
4018 {
4019         struct igb_adapter *adapter = q_vector->adapter;
4020         struct e1000_hw *hw = &adapter->hw;
4021         int cpu = get_cpu();
4022
4023         if (q_vector->cpu == cpu)
4024                 goto out_no_update;
4025
4026         if (q_vector->tx_ring) {
4027                 int q = q_vector->tx_ring->reg_idx;
4028                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4029                 if (hw->mac.type == e1000_82575) {
4030                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4031                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4032                 } else {
4033                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4034                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4035                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4036                 }
4037                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4038                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4039         }
4040         if (q_vector->rx_ring) {
4041                 int q = q_vector->rx_ring->reg_idx;
4042                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4043                 if (hw->mac.type == e1000_82575) {
4044                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4045                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4046                 } else {
4047                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4048                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4049                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4050                 }
4051                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4052                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4053                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4054                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4055         }
4056         q_vector->cpu = cpu;
4057 out_no_update:
4058         put_cpu();
4059 }
4060
4061 static void igb_setup_dca(struct igb_adapter *adapter)
4062 {
4063         struct e1000_hw *hw = &adapter->hw;
4064         int i;
4065
4066         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4067                 return;
4068
4069         /* Always use CB2 mode, difference is masked in the CB driver. */
4070         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4071
4072         for (i = 0; i < adapter->num_q_vectors; i++) {
4073                 struct igb_q_vector *q_vector = adapter->q_vector[i];
4074                 q_vector->cpu = -1;
4075                 igb_update_dca(q_vector);
4076         }
4077 }
4078
4079 static int __igb_notify_dca(struct device *dev, void *data)
4080 {
4081         struct net_device *netdev = dev_get_drvdata(dev);
4082         struct igb_adapter *adapter = netdev_priv(netdev);
4083         struct pci_dev *pdev = adapter->pdev;
4084         struct e1000_hw *hw = &adapter->hw;
4085         unsigned long event = *(unsigned long *)data;
4086
4087         switch (event) {
4088         case DCA_PROVIDER_ADD:
4089                 /* if already enabled, don't do it again */
4090                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4091                         break;
4092                 if (dca_add_requester(dev) == 0) {
4093                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4094                         dev_info(&pdev->dev, "DCA enabled\n");
4095                         igb_setup_dca(adapter);
4096                         break;
4097                 }
4098                 /* Fall Through since DCA is disabled. */
4099         case DCA_PROVIDER_REMOVE:
4100                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4101                         /* without this a class_device is left
4102                          * hanging around in the sysfs model */
4103                         dca_remove_requester(dev);
4104                         dev_info(&pdev->dev, "DCA disabled\n");
4105                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4106                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4107                 }
4108                 break;
4109         }
4110
4111         return 0;
4112 }
4113
4114 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4115                           void *p)
4116 {
4117         int ret_val;
4118
4119         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4120                                          __igb_notify_dca);
4121
4122         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4123 }
4124 #endif /* CONFIG_IGB_DCA */
4125
4126 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4127 {
4128         struct e1000_hw *hw = &adapter->hw;
4129         u32 ping;
4130         int i;
4131
4132         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4133                 ping = E1000_PF_CONTROL_MSG;
4134                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4135                         ping |= E1000_VT_MSGTYPE_CTS;
4136                 igb_write_mbx(hw, &ping, 1, i);
4137         }
4138 }
4139
4140 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4141 {
4142         struct e1000_hw *hw = &adapter->hw;
4143         u32 vmolr = rd32(E1000_VMOLR(vf));
4144         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4145
4146         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4147                             IGB_VF_FLAG_MULTI_PROMISC);
4148         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4149
4150         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4151                 vmolr |= E1000_VMOLR_MPME;
4152                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4153         } else {
4154                 /*
4155                  * if we have hashes and we are clearing a multicast promisc
4156                  * flag we need to write the hashes to the MTA as this step
4157                  * was previously skipped
4158                  */
4159                 if (vf_data->num_vf_mc_hashes > 30) {
4160                         vmolr |= E1000_VMOLR_MPME;
4161                 } else if (vf_data->num_vf_mc_hashes) {
4162                         int j;
4163                         vmolr |= E1000_VMOLR_ROMPE;
4164                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4165                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4166                 }
4167         }
4168
4169         wr32(E1000_VMOLR(vf), vmolr);
4170
4171         /* there are flags left unprocessed, likely not supported */
4172         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4173                 return -EINVAL;
4174
4175         return 0;
4176
4177 }
4178
4179 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4180                                   u32 *msgbuf, u32 vf)
4181 {
4182         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4183         u16 *hash_list = (u16 *)&msgbuf[1];
4184         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4185         int i;
4186
4187         /* salt away the number of multicast addresses assigned
4188          * to this VF for later use to restore when the PF multi cast
4189          * list changes
4190          */
4191         vf_data->num_vf_mc_hashes = n;
4192
4193         /* only up to 30 hash values supported */
4194         if (n > 30)
4195                 n = 30;
4196
4197         /* store the hashes for later use */
4198         for (i = 0; i < n; i++)
4199                 vf_data->vf_mc_hashes[i] = hash_list[i];
4200
4201         /* Flush and reset the mta with the new values */
4202         igb_set_rx_mode(adapter->netdev);
4203
4204         return 0;
4205 }
4206
4207 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4208 {
4209         struct e1000_hw *hw = &adapter->hw;
4210         struct vf_data_storage *vf_data;
4211         int i, j;
4212
4213         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4214                 u32 vmolr = rd32(E1000_VMOLR(i));
4215                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4216
4217                 vf_data = &adapter->vf_data[i];
4218
4219                 if ((vf_data->num_vf_mc_hashes > 30) ||
4220                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4221                         vmolr |= E1000_VMOLR_MPME;
4222                 } else if (vf_data->num_vf_mc_hashes) {
4223                         vmolr |= E1000_VMOLR_ROMPE;
4224                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4225                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4226                 }
4227                 wr32(E1000_VMOLR(i), vmolr);
4228         }
4229 }
4230
4231 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4232 {
4233         struct e1000_hw *hw = &adapter->hw;
4234         u32 pool_mask, reg, vid;
4235         int i;
4236
4237         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4238
4239         /* Find the vlan filter for this id */
4240         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4241                 reg = rd32(E1000_VLVF(i));
4242
4243                 /* remove the vf from the pool */
4244                 reg &= ~pool_mask;
4245
4246                 /* if pool is empty then remove entry from vfta */
4247                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4248                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4249                         reg = 0;
4250                         vid = reg & E1000_VLVF_VLANID_MASK;
4251                         igb_vfta_set(hw, vid, false);
4252                 }
4253
4254                 wr32(E1000_VLVF(i), reg);
4255         }
4256
4257         adapter->vf_data[vf].vlans_enabled = 0;
4258 }
4259
4260 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4261 {
4262         struct e1000_hw *hw = &adapter->hw;
4263         u32 reg, i;
4264
4265         /* The vlvf table only exists on 82576 hardware and newer */
4266         if (hw->mac.type < e1000_82576)
4267                 return -1;
4268
4269         /* we only need to do this if VMDq is enabled */
4270         if (!adapter->vfs_allocated_count)
4271                 return -1;
4272
4273         /* Find the vlan filter for this id */
4274         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4275                 reg = rd32(E1000_VLVF(i));
4276                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4277                     vid == (reg & E1000_VLVF_VLANID_MASK))
4278                         break;
4279         }
4280
4281         if (add) {
4282                 if (i == E1000_VLVF_ARRAY_SIZE) {
4283                         /* Did not find a matching VLAN ID entry that was
4284                          * enabled.  Search for a free filter entry, i.e.
4285                          * one without the enable bit set
4286                          */
4287                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4288                                 reg = rd32(E1000_VLVF(i));
4289                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4290                                         break;
4291                         }
4292                 }
4293                 if (i < E1000_VLVF_ARRAY_SIZE) {
4294                         /* Found an enabled/available entry */
4295                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4296
4297                         /* if !enabled we need to set this up in vfta */
4298                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4299                                 /* add VID to filter table */
4300                                 igb_vfta_set(hw, vid, true);
4301                                 reg |= E1000_VLVF_VLANID_ENABLE;
4302                         }
4303                         reg &= ~E1000_VLVF_VLANID_MASK;
4304                         reg |= vid;
4305                         wr32(E1000_VLVF(i), reg);
4306
4307                         /* do not modify RLPML for PF devices */
4308                         if (vf >= adapter->vfs_allocated_count)
4309                                 return 0;
4310
4311                         if (!adapter->vf_data[vf].vlans_enabled) {
4312                                 u32 size;
4313                                 reg = rd32(E1000_VMOLR(vf));
4314                                 size = reg & E1000_VMOLR_RLPML_MASK;
4315                                 size += 4;
4316                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4317                                 reg |= size;
4318                                 wr32(E1000_VMOLR(vf), reg);
4319                         }
4320
4321                         adapter->vf_data[vf].vlans_enabled++;
4322                         return 0;
4323                 }
4324         } else {
4325                 if (i < E1000_VLVF_ARRAY_SIZE) {
4326                         /* remove vf from the pool */
4327                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4328                         /* if pool is empty then remove entry from vfta */
4329                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4330                                 reg = 0;
4331                                 igb_vfta_set(hw, vid, false);
4332                         }
4333                         wr32(E1000_VLVF(i), reg);
4334
4335                         /* do not modify RLPML for PF devices */
4336                         if (vf >= adapter->vfs_allocated_count)
4337                                 return 0;
4338
4339                         adapter->vf_data[vf].vlans_enabled--;
4340                         if (!adapter->vf_data[vf].vlans_enabled) {
4341                                 u32 size;
4342                                 reg = rd32(E1000_VMOLR(vf));
4343                                 size = reg & E1000_VMOLR_RLPML_MASK;
4344                                 size -= 4;
4345                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4346                                 reg |= size;
4347                                 wr32(E1000_VMOLR(vf), reg);
4348                         }
4349                         return 0;
4350                 }
4351         }
4352         return -1;
4353 }
4354
4355 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4356 {
4357         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4358         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4359
4360         return igb_vlvf_set(adapter, vid, add, vf);
4361 }
4362
4363 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4364 {
4365         /* clear all flags */
4366         adapter->vf_data[vf].flags = 0;
4367         adapter->vf_data[vf].last_nack = jiffies;
4368
4369         /* reset offloads to defaults */
4370         igb_set_vmolr(adapter, vf);
4371
4372         /* reset vlans for device */
4373         igb_clear_vf_vfta(adapter, vf);
4374
4375         /* reset multicast table array for vf */
4376         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4377
4378         /* Flush and reset the mta with the new values */
4379         igb_set_rx_mode(adapter->netdev);
4380 }
4381
4382 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4383 {
4384         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4385
4386         /* generate a new mac address as we were hotplug removed/added */
4387         random_ether_addr(vf_mac);
4388
4389         /* process remaining reset events */
4390         igb_vf_reset(adapter, vf);
4391 }
4392
4393 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4394 {
4395         struct e1000_hw *hw = &adapter->hw;
4396         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4397         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4398         u32 reg, msgbuf[3];
4399         u8 *addr = (u8 *)(&msgbuf[1]);
4400
4401         /* process all the same items cleared in a function level reset */
4402         igb_vf_reset(adapter, vf);
4403
4404         /* set vf mac address */
4405         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4406
4407         /* enable transmit and receive for vf */
4408         reg = rd32(E1000_VFTE);
4409         wr32(E1000_VFTE, reg | (1 << vf));
4410         reg = rd32(E1000_VFRE);
4411         wr32(E1000_VFRE, reg | (1 << vf));
4412
4413         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4414
4415         /* reply to reset with ack and vf mac address */
4416         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4417         memcpy(addr, vf_mac, 6);
4418         igb_write_mbx(hw, msgbuf, 3, vf);
4419 }
4420
4421 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4422 {
4423         unsigned char *addr = (char *)&msg[1];
4424         int err = -1;
4425
4426         if (is_valid_ether_addr(addr))
4427                 err = igb_set_vf_mac(adapter, vf, addr);
4428
4429         return err;
4430 }
4431
4432 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4433 {
4434         struct e1000_hw *hw = &adapter->hw;
4435         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4436         u32 msg = E1000_VT_MSGTYPE_NACK;
4437
4438         /* if device isn't clear to send it shouldn't be reading either */
4439         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4440             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4441                 igb_write_mbx(hw, &msg, 1, vf);
4442                 vf_data->last_nack = jiffies;
4443         }
4444 }
4445
4446 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4447 {
4448         struct pci_dev *pdev = adapter->pdev;
4449         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4450         struct e1000_hw *hw = &adapter->hw;
4451         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4452         s32 retval;
4453
4454         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4455
4456         if (retval)
4457                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4458
4459         /* this is a message we already processed, do nothing */
4460         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4461                 return;
4462
4463         /*
4464          * until the vf completes a reset it should not be
4465          * allowed to start any configuration.
4466          */
4467
4468         if (msgbuf[0] == E1000_VF_RESET) {
4469                 igb_vf_reset_msg(adapter, vf);
4470                 return;
4471         }
4472
4473         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4474                 msgbuf[0] = E1000_VT_MSGTYPE_NACK;
4475                 if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4476                         igb_write_mbx(hw, msgbuf, 1, vf);
4477                         vf_data->last_nack = jiffies;
4478                 }
4479                 return;
4480         }
4481
4482         switch ((msgbuf[0] & 0xFFFF)) {
4483         case E1000_VF_SET_MAC_ADDR:
4484                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4485                 break;
4486         case E1000_VF_SET_PROMISC:
4487                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4488                 break;
4489         case E1000_VF_SET_MULTICAST:
4490                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4491                 break;
4492         case E1000_VF_SET_LPE:
4493                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4494                 break;
4495         case E1000_VF_SET_VLAN:
4496                 retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4497                 break;
4498         default:
4499                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4500                 retval = -1;
4501                 break;
4502         }
4503
4504         /* notify the VF of the results of what it sent us */
4505         if (retval)
4506                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4507         else
4508                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4509
4510         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4511
4512         igb_write_mbx(hw, msgbuf, 1, vf);
4513 }
4514
4515 static void igb_msg_task(struct igb_adapter *adapter)
4516 {
4517         struct e1000_hw *hw = &adapter->hw;
4518         u32 vf;
4519
4520         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4521                 /* process any reset requests */
4522                 if (!igb_check_for_rst(hw, vf))
4523                         igb_vf_reset_event(adapter, vf);
4524
4525                 /* process any messages pending */
4526                 if (!igb_check_for_msg(hw, vf))
4527                         igb_rcv_msg_from_vf(adapter, vf);
4528
4529                 /* process any acks */
4530                 if (!igb_check_for_ack(hw, vf))
4531                         igb_rcv_ack_from_vf(adapter, vf);
4532         }
4533 }
4534
4535 /**
4536  *  igb_set_uta - Set unicast filter table address
4537  *  @adapter: board private structure
4538  *
4539  *  The unicast table address is a register array of 32-bit registers.
4540  *  The table is meant to be used in a way similar to how the MTA is used
4541  *  however due to certain limitations in the hardware it is necessary to
4542  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4543  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4544  **/
4545 static void igb_set_uta(struct igb_adapter *adapter)
4546 {
4547         struct e1000_hw *hw = &adapter->hw;
4548         int i;
4549
4550         /* The UTA table only exists on 82576 hardware and newer */
4551         if (hw->mac.type < e1000_82576)
4552                 return;
4553
4554         /* we only need to do this if VMDq is enabled */
4555         if (!adapter->vfs_allocated_count)
4556                 return;
4557
4558         for (i = 0; i < hw->mac.uta_reg_count; i++)
4559                 array_wr32(E1000_UTA, i, ~0);
4560 }
4561
4562 /**
4563  * igb_intr_msi - Interrupt Handler
4564  * @irq: interrupt number
4565  * @data: pointer to a network interface device structure
4566  **/
4567 static irqreturn_t igb_intr_msi(int irq, void *data)
4568 {
4569         struct igb_adapter *adapter = data;
4570         struct igb_q_vector *q_vector = adapter->q_vector[0];
4571         struct e1000_hw *hw = &adapter->hw;
4572         /* read ICR disables interrupts using IAM */
4573         u32 icr = rd32(E1000_ICR);
4574
4575         igb_write_itr(q_vector);
4576
4577         if (icr & E1000_ICR_DOUTSYNC) {
4578                 /* HW is reporting DMA is out of sync */
4579                 adapter->stats.doosync++;
4580         }
4581
4582         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4583                 hw->mac.get_link_status = 1;
4584                 if (!test_bit(__IGB_DOWN, &adapter->state))
4585                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4586         }
4587
4588         napi_schedule(&q_vector->napi);
4589
4590         return IRQ_HANDLED;
4591 }
4592
4593 /**
4594  * igb_intr - Legacy Interrupt Handler
4595  * @irq: interrupt number
4596  * @data: pointer to a network interface device structure
4597  **/
4598 static irqreturn_t igb_intr(int irq, void *data)
4599 {
4600         struct igb_adapter *adapter = data;
4601         struct igb_q_vector *q_vector = adapter->q_vector[0];
4602         struct e1000_hw *hw = &adapter->hw;
4603         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4604          * need for the IMC write */
4605         u32 icr = rd32(E1000_ICR);
4606         if (!icr)
4607                 return IRQ_NONE;  /* Not our interrupt */
4608
4609         igb_write_itr(q_vector);
4610
4611         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4612          * not set, then the adapter didn't send an interrupt */
4613         if (!(icr & E1000_ICR_INT_ASSERTED))
4614                 return IRQ_NONE;
4615
4616         if (icr & E1000_ICR_DOUTSYNC) {
4617                 /* HW is reporting DMA is out of sync */
4618                 adapter->stats.doosync++;
4619         }
4620
4621         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4622                 hw->mac.get_link_status = 1;
4623                 /* guard against interrupt when we're going down */
4624                 if (!test_bit(__IGB_DOWN, &adapter->state))
4625                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4626         }
4627
4628         napi_schedule(&q_vector->napi);
4629
4630         return IRQ_HANDLED;
4631 }
4632
4633 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4634 {
4635         struct igb_adapter *adapter = q_vector->adapter;
4636         struct e1000_hw *hw = &adapter->hw;
4637
4638         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4639             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4640                 if (!adapter->msix_entries)
4641                         igb_set_itr(adapter);
4642                 else
4643                         igb_update_ring_itr(q_vector);
4644         }
4645
4646         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4647                 if (adapter->msix_entries)
4648                         wr32(E1000_EIMS, q_vector->eims_value);
4649                 else
4650                         igb_irq_enable(adapter);
4651         }
4652 }
4653
4654 /**
4655  * igb_poll - NAPI Rx polling callback
4656  * @napi: napi polling structure
4657  * @budget: count of how many packets we should handle
4658  **/
4659 static int igb_poll(struct napi_struct *napi, int budget)
4660 {
4661         struct igb_q_vector *q_vector = container_of(napi,
4662                                                      struct igb_q_vector,
4663                                                      napi);
4664         int tx_clean_complete = 1, work_done = 0;
4665
4666 #ifdef CONFIG_IGB_DCA
4667         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4668                 igb_update_dca(q_vector);
4669 #endif
4670         if (q_vector->tx_ring)
4671                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4672
4673         if (q_vector->rx_ring)
4674                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4675
4676         if (!tx_clean_complete)
4677                 work_done = budget;
4678
4679         /* If not enough Rx work done, exit the polling mode */
4680         if (work_done < budget) {
4681                 napi_complete(napi);
4682                 igb_ring_irq_enable(q_vector);
4683         }
4684
4685         return work_done;
4686 }
4687
4688 /**
4689  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4690  * @adapter: board private structure
4691  * @shhwtstamps: timestamp structure to update
4692  * @regval: unsigned 64bit system time value.
4693  *
4694  * We need to convert the system time value stored in the RX/TXSTMP registers
4695  * into a hwtstamp which can be used by the upper level timestamping functions
4696  */
4697 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4698                                    struct skb_shared_hwtstamps *shhwtstamps,
4699                                    u64 regval)
4700 {
4701         u64 ns;
4702
4703         ns = timecounter_cyc2time(&adapter->clock, regval);
4704         timecompare_update(&adapter->compare, ns);
4705         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4706         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4707         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4708 }
4709
4710 /**
4711  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4712  * @q_vector: pointer to q_vector containing needed info
4713  * @skb: packet that was just sent
4714  *
4715  * If we were asked to do hardware stamping and such a time stamp is
4716  * available, then it must have been for this skb here because we only
4717  * allow only one such packet into the queue.
4718  */
4719 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4720 {
4721         struct igb_adapter *adapter = q_vector->adapter;
4722         union skb_shared_tx *shtx = skb_tx(skb);
4723         struct e1000_hw *hw = &adapter->hw;
4724         struct skb_shared_hwtstamps shhwtstamps;
4725         u64 regval;
4726
4727         /* if skb does not support hw timestamp or TX stamp not valid exit */
4728         if (likely(!shtx->hardware) ||
4729             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4730                 return;
4731
4732         regval = rd32(E1000_TXSTMPL);
4733         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4734
4735         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4736         skb_tstamp_tx(skb, &shhwtstamps);
4737 }
4738
4739 /**
4740  * igb_clean_tx_irq - Reclaim resources after transmit completes
4741  * @q_vector: pointer to q_vector containing needed info
4742  * returns true if ring is completely cleaned
4743  **/
4744 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4745 {
4746         struct igb_adapter *adapter = q_vector->adapter;
4747         struct igb_ring *tx_ring = q_vector->tx_ring;
4748         struct net_device *netdev = tx_ring->netdev;
4749         struct e1000_hw *hw = &adapter->hw;
4750         struct igb_buffer *buffer_info;
4751         struct sk_buff *skb;
4752         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4753         unsigned int total_bytes = 0, total_packets = 0;
4754         unsigned int i, eop, count = 0;
4755         bool cleaned = false;
4756
4757         i = tx_ring->next_to_clean;
4758         eop = tx_ring->buffer_info[i].next_to_watch;
4759         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4760
4761         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
4762                (count < tx_ring->count)) {
4763                 for (cleaned = false; !cleaned; count++) {
4764                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4765                         buffer_info = &tx_ring->buffer_info[i];
4766                         cleaned = (i == eop);
4767                         skb = buffer_info->skb;
4768
4769                         if (skb) {
4770                                 unsigned int segs, bytecount;
4771                                 /* gso_segs is currently only valid for tcp */
4772                                 segs = skb_shinfo(skb)->gso_segs ?: 1;
4773                                 /* multiply data chunks by size of headers */
4774                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
4775                                             skb->len;
4776                                 total_packets += segs;
4777                                 total_bytes += bytecount;
4778
4779                                 igb_tx_hwtstamp(q_vector, skb);
4780                         }
4781
4782                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
4783                         tx_desc->wb.status = 0;
4784
4785                         i++;
4786                         if (i == tx_ring->count)
4787                                 i = 0;
4788                 }
4789                 eop = tx_ring->buffer_info[i].next_to_watch;
4790                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
4791         }
4792
4793         tx_ring->next_to_clean = i;
4794
4795         if (unlikely(count &&
4796                      netif_carrier_ok(netdev) &&
4797                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
4798                 /* Make sure that anybody stopping the queue after this
4799                  * sees the new next_to_clean.
4800                  */
4801                 smp_mb();
4802                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
4803                     !(test_bit(__IGB_DOWN, &adapter->state))) {
4804                         netif_wake_subqueue(netdev, tx_ring->queue_index);
4805                         tx_ring->tx_stats.restart_queue++;
4806                 }
4807         }
4808
4809         if (tx_ring->detect_tx_hung) {
4810                 /* Detect a transmit hang in hardware, this serializes the
4811                  * check with the clearing of time_stamp and movement of i */
4812                 tx_ring->detect_tx_hung = false;
4813                 if (tx_ring->buffer_info[i].time_stamp &&
4814                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
4815                                (adapter->tx_timeout_factor * HZ))
4816                     && !(rd32(E1000_STATUS) &
4817                          E1000_STATUS_TXOFF)) {
4818
4819                         /* detected Tx unit hang */
4820                         dev_err(&tx_ring->pdev->dev,
4821                                 "Detected Tx Unit Hang\n"
4822                                 "  Tx Queue             <%d>\n"
4823                                 "  TDH                  <%x>\n"
4824                                 "  TDT                  <%x>\n"
4825                                 "  next_to_use          <%x>\n"
4826                                 "  next_to_clean        <%x>\n"
4827                                 "buffer_info[next_to_clean]\n"
4828                                 "  time_stamp           <%lx>\n"
4829                                 "  next_to_watch        <%x>\n"
4830                                 "  jiffies              <%lx>\n"
4831                                 "  desc.status          <%x>\n",
4832                                 tx_ring->queue_index,
4833                                 readl(tx_ring->head),
4834                                 readl(tx_ring->tail),
4835                                 tx_ring->next_to_use,
4836                                 tx_ring->next_to_clean,
4837                                 tx_ring->buffer_info[eop].time_stamp,
4838                                 eop,
4839                                 jiffies,
4840                                 eop_desc->wb.status);
4841                         netif_stop_subqueue(netdev, tx_ring->queue_index);
4842                 }
4843         }
4844         tx_ring->total_bytes += total_bytes;
4845         tx_ring->total_packets += total_packets;
4846         tx_ring->tx_stats.bytes += total_bytes;
4847         tx_ring->tx_stats.packets += total_packets;
4848         return (count < tx_ring->count);
4849 }
4850
4851 /**
4852  * igb_receive_skb - helper function to handle rx indications
4853  * @q_vector: structure containing interrupt and ring information
4854  * @skb: packet to send up
4855  * @vlan_tag: vlan tag for packet
4856  **/
4857 static void igb_receive_skb(struct igb_q_vector *q_vector,
4858                             struct sk_buff *skb,
4859                             u16 vlan_tag)
4860 {
4861         struct igb_adapter *adapter = q_vector->adapter;
4862
4863         if (vlan_tag)
4864                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
4865                                  vlan_tag, skb);
4866         else
4867                 napi_gro_receive(&q_vector->napi, skb);
4868 }
4869
4870 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
4871                                        u32 status_err, struct sk_buff *skb)
4872 {
4873         skb->ip_summed = CHECKSUM_NONE;
4874
4875         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
4876         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
4877              (status_err & E1000_RXD_STAT_IXSM))
4878                 return;
4879
4880         /* TCP/UDP checksum error bit is set */
4881         if (status_err &
4882             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
4883                 /*
4884                  * work around errata with sctp packets where the TCPE aka
4885                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
4886                  * packets, (aka let the stack check the crc32c)
4887                  */
4888                 if ((skb->len == 60) &&
4889                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
4890                         ring->rx_stats.csum_err++;
4891
4892                 /* let the stack verify checksum errors */
4893                 return;
4894         }
4895         /* It must be a TCP or UDP packet with a valid checksum */
4896         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
4897                 skb->ip_summed = CHECKSUM_UNNECESSARY;
4898
4899         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
4900 }
4901
4902 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
4903                                    struct sk_buff *skb)
4904 {
4905         struct igb_adapter *adapter = q_vector->adapter;
4906         struct e1000_hw *hw = &adapter->hw;
4907         u64 regval;
4908
4909         /*
4910          * If this bit is set, then the RX registers contain the time stamp. No
4911          * other packet will be time stamped until we read these registers, so
4912          * read the registers to make them available again. Because only one
4913          * packet can be time stamped at a time, we know that the register
4914          * values must belong to this one here and therefore we don't need to
4915          * compare any of the additional attributes stored for it.
4916          *
4917          * If nothing went wrong, then it should have a skb_shared_tx that we
4918          * can turn into a skb_shared_hwtstamps.
4919          */
4920         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
4921                 return;
4922         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
4923                 return;
4924
4925         regval = rd32(E1000_RXSTMPL);
4926         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
4927
4928         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
4929 }
4930 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
4931                                union e1000_adv_rx_desc *rx_desc)
4932 {
4933         /* HW will not DMA in data larger than the given buffer, even if it
4934          * parses the (NFS, of course) header to be larger.  In that case, it
4935          * fills the header buffer and spills the rest into the page.
4936          */
4937         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
4938                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
4939         if (hlen > rx_ring->rx_buffer_len)
4940                 hlen = rx_ring->rx_buffer_len;
4941         return hlen;
4942 }
4943
4944 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
4945                                  int *work_done, int budget)
4946 {
4947         struct igb_ring *rx_ring = q_vector->rx_ring;
4948         struct net_device *netdev = rx_ring->netdev;
4949         struct pci_dev *pdev = rx_ring->pdev;
4950         union e1000_adv_rx_desc *rx_desc , *next_rxd;
4951         struct igb_buffer *buffer_info , *next_buffer;
4952         struct sk_buff *skb;
4953         bool cleaned = false;
4954         int cleaned_count = 0;
4955         int current_node = numa_node_id();
4956         unsigned int total_bytes = 0, total_packets = 0;
4957         unsigned int i;
4958         u32 staterr;
4959         u16 length;
4960         u16 vlan_tag;
4961
4962         i = rx_ring->next_to_clean;
4963         buffer_info = &rx_ring->buffer_info[i];
4964         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
4965         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
4966
4967         while (staterr & E1000_RXD_STAT_DD) {
4968                 if (*work_done >= budget)
4969                         break;
4970                 (*work_done)++;
4971
4972                 skb = buffer_info->skb;
4973                 prefetch(skb->data - NET_IP_ALIGN);
4974                 buffer_info->skb = NULL;
4975
4976                 i++;
4977                 if (i == rx_ring->count)
4978                         i = 0;
4979
4980                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
4981                 prefetch(next_rxd);
4982                 next_buffer = &rx_ring->buffer_info[i];
4983
4984                 length = le16_to_cpu(rx_desc->wb.upper.length);
4985                 cleaned = true;
4986                 cleaned_count++;
4987
4988                 if (buffer_info->dma) {
4989                         pci_unmap_single(pdev, buffer_info->dma,
4990                                          rx_ring->rx_buffer_len,
4991                                          PCI_DMA_FROMDEVICE);
4992                         buffer_info->dma = 0;
4993                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
4994                                 skb_put(skb, length);
4995                                 goto send_up;
4996                         }
4997                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
4998                 }
4999
5000                 if (length) {
5001                         pci_unmap_page(pdev, buffer_info->page_dma,
5002                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5003                         buffer_info->page_dma = 0;
5004
5005                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5006                                                 buffer_info->page,
5007                                                 buffer_info->page_offset,
5008                                                 length);
5009
5010                         if ((page_count(buffer_info->page) != 1) ||
5011                             (page_to_nid(buffer_info->page) != current_node))
5012                                 buffer_info->page = NULL;
5013                         else
5014                                 get_page(buffer_info->page);
5015
5016                         skb->len += length;
5017                         skb->data_len += length;
5018                         skb->truesize += length;
5019                 }
5020
5021                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5022                         buffer_info->skb = next_buffer->skb;
5023                         buffer_info->dma = next_buffer->dma;
5024                         next_buffer->skb = skb;
5025                         next_buffer->dma = 0;
5026                         goto next_desc;
5027                 }
5028 send_up:
5029                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5030                         dev_kfree_skb_irq(skb);
5031                         goto next_desc;
5032                 }
5033
5034                 igb_rx_hwtstamp(q_vector, staterr, skb);
5035                 total_bytes += skb->len;
5036                 total_packets++;
5037
5038                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5039
5040                 skb->protocol = eth_type_trans(skb, netdev);
5041                 skb_record_rx_queue(skb, rx_ring->queue_index);
5042
5043                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5044                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5045
5046                 igb_receive_skb(q_vector, skb, vlan_tag);
5047
5048 next_desc:
5049                 rx_desc->wb.upper.status_error = 0;
5050
5051                 /* return some buffers to hardware, one at a time is too slow */
5052                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5053                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5054                         cleaned_count = 0;
5055                 }
5056
5057                 /* use prefetched values */
5058                 rx_desc = next_rxd;
5059                 buffer_info = next_buffer;
5060                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5061         }
5062
5063         rx_ring->next_to_clean = i;
5064         cleaned_count = igb_desc_unused(rx_ring);
5065
5066         if (cleaned_count)
5067                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5068
5069         rx_ring->total_packets += total_packets;
5070         rx_ring->total_bytes += total_bytes;
5071         rx_ring->rx_stats.packets += total_packets;
5072         rx_ring->rx_stats.bytes += total_bytes;
5073         return cleaned;
5074 }
5075
5076 /**
5077  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5078  * @adapter: address of board private structure
5079  **/
5080 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5081 {
5082         struct net_device *netdev = rx_ring->netdev;
5083         union e1000_adv_rx_desc *rx_desc;
5084         struct igb_buffer *buffer_info;
5085         struct sk_buff *skb;
5086         unsigned int i;
5087         int bufsz;
5088
5089         i = rx_ring->next_to_use;
5090         buffer_info = &rx_ring->buffer_info[i];
5091
5092         bufsz = rx_ring->rx_buffer_len;
5093
5094         while (cleaned_count--) {
5095                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5096
5097                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5098                         if (!buffer_info->page) {
5099                                 buffer_info->page = netdev_alloc_page(netdev);
5100                                 if (!buffer_info->page) {
5101                                         rx_ring->rx_stats.alloc_failed++;
5102                                         goto no_buffers;
5103                                 }
5104                                 buffer_info->page_offset = 0;
5105                         } else {
5106                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5107                         }
5108                         buffer_info->page_dma =
5109                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5110                                              buffer_info->page_offset,
5111                                              PAGE_SIZE / 2,
5112                                              PCI_DMA_FROMDEVICE);
5113                         if (pci_dma_mapping_error(rx_ring->pdev,
5114                                                   buffer_info->page_dma)) {
5115                                 buffer_info->page_dma = 0;
5116                                 rx_ring->rx_stats.alloc_failed++;
5117                                 goto no_buffers;
5118                         }
5119                 }
5120
5121                 skb = buffer_info->skb;
5122                 if (!skb) {
5123                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5124                         if (!skb) {
5125                                 rx_ring->rx_stats.alloc_failed++;
5126                                 goto no_buffers;
5127                         }
5128
5129                         buffer_info->skb = skb;
5130                 }
5131                 if (!buffer_info->dma) {
5132                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5133                                                           skb->data,
5134                                                           bufsz,
5135                                                           PCI_DMA_FROMDEVICE);
5136                         if (pci_dma_mapping_error(rx_ring->pdev,
5137                                                   buffer_info->dma)) {
5138                                 buffer_info->dma = 0;
5139                                 rx_ring->rx_stats.alloc_failed++;
5140                                 goto no_buffers;
5141                         }
5142                 }
5143                 /* Refresh the desc even if buffer_addrs didn't change because
5144                  * each write-back erases this info. */
5145                 if (bufsz < IGB_RXBUFFER_1024) {
5146                         rx_desc->read.pkt_addr =
5147                              cpu_to_le64(buffer_info->page_dma);
5148                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5149                 } else {
5150                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5151                         rx_desc->read.hdr_addr = 0;
5152                 }
5153
5154                 i++;
5155                 if (i == rx_ring->count)
5156                         i = 0;
5157                 buffer_info = &rx_ring->buffer_info[i];
5158         }
5159
5160 no_buffers:
5161         if (rx_ring->next_to_use != i) {
5162                 rx_ring->next_to_use = i;
5163                 if (i == 0)
5164                         i = (rx_ring->count - 1);
5165                 else
5166                         i--;
5167
5168                 /* Force memory writes to complete before letting h/w
5169                  * know there are new descriptors to fetch.  (Only
5170                  * applicable for weak-ordered memory model archs,
5171                  * such as IA-64). */
5172                 wmb();
5173                 writel(i, rx_ring->tail);
5174         }
5175 }
5176
5177 /**
5178  * igb_mii_ioctl -
5179  * @netdev:
5180  * @ifreq:
5181  * @cmd:
5182  **/
5183 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5184 {
5185         struct igb_adapter *adapter = netdev_priv(netdev);
5186         struct mii_ioctl_data *data = if_mii(ifr);
5187
5188         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5189                 return -EOPNOTSUPP;
5190
5191         switch (cmd) {
5192         case SIOCGMIIPHY:
5193                 data->phy_id = adapter->hw.phy.addr;
5194                 break;
5195         case SIOCGMIIREG:
5196                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5197                                      &data->val_out))
5198                         return -EIO;
5199                 break;
5200         case SIOCSMIIREG:
5201         default:
5202                 return -EOPNOTSUPP;
5203         }
5204         return 0;
5205 }
5206
5207 /**
5208  * igb_hwtstamp_ioctl - control hardware time stamping
5209  * @netdev:
5210  * @ifreq:
5211  * @cmd:
5212  *
5213  * Outgoing time stamping can be enabled and disabled. Play nice and
5214  * disable it when requested, although it shouldn't case any overhead
5215  * when no packet needs it. At most one packet in the queue may be
5216  * marked for time stamping, otherwise it would be impossible to tell
5217  * for sure to which packet the hardware time stamp belongs.
5218  *
5219  * Incoming time stamping has to be configured via the hardware
5220  * filters. Not all combinations are supported, in particular event
5221  * type has to be specified. Matching the kind of event packet is
5222  * not supported, with the exception of "all V2 events regardless of
5223  * level 2 or 4".
5224  *
5225  **/
5226 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5227                               struct ifreq *ifr, int cmd)
5228 {
5229         struct igb_adapter *adapter = netdev_priv(netdev);
5230         struct e1000_hw *hw = &adapter->hw;
5231         struct hwtstamp_config config;
5232         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5233         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5234         u32 tsync_rx_cfg = 0;
5235         bool is_l4 = false;
5236         bool is_l2 = false;
5237         u32 regval;
5238
5239         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5240                 return -EFAULT;
5241
5242         /* reserved for future extensions */
5243         if (config.flags)
5244                 return -EINVAL;
5245
5246         switch (config.tx_type) {
5247         case HWTSTAMP_TX_OFF:
5248                 tsync_tx_ctl = 0;
5249         case HWTSTAMP_TX_ON:
5250                 break;
5251         default:
5252                 return -ERANGE;
5253         }
5254
5255         switch (config.rx_filter) {
5256         case HWTSTAMP_FILTER_NONE:
5257                 tsync_rx_ctl = 0;
5258                 break;
5259         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5260         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5261         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5262         case HWTSTAMP_FILTER_ALL:
5263                 /*
5264                  * register TSYNCRXCFG must be set, therefore it is not
5265                  * possible to time stamp both Sync and Delay_Req messages
5266                  * => fall back to time stamping all packets
5267                  */
5268                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5269                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5270                 break;
5271         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5272                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5273                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5274                 is_l4 = true;
5275                 break;
5276         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5277                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5278                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5279                 is_l4 = true;
5280                 break;
5281         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5282         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5283                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5284                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5285                 is_l2 = true;
5286                 is_l4 = true;
5287                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5288                 break;
5289         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5290         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5291                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5292                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5293                 is_l2 = true;
5294                 is_l4 = true;
5295                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5296                 break;
5297         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5298         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5299         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5300                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5301                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5302                 is_l2 = true;
5303                 break;
5304         default:
5305                 return -ERANGE;
5306         }
5307
5308         if (hw->mac.type == e1000_82575) {
5309                 if (tsync_rx_ctl | tsync_tx_ctl)
5310                         return -EINVAL;
5311                 return 0;
5312         }
5313
5314         /* enable/disable TX */
5315         regval = rd32(E1000_TSYNCTXCTL);
5316         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5317         regval |= tsync_tx_ctl;
5318         wr32(E1000_TSYNCTXCTL, regval);
5319
5320         /* enable/disable RX */
5321         regval = rd32(E1000_TSYNCRXCTL);
5322         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5323         regval |= tsync_rx_ctl;
5324         wr32(E1000_TSYNCRXCTL, regval);
5325
5326         /* define which PTP packets are time stamped */
5327         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5328
5329         /* define ethertype filter for timestamped packets */
5330         if (is_l2)
5331                 wr32(E1000_ETQF(3),
5332                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5333                                  E1000_ETQF_1588 | /* enable timestamping */
5334                                  ETH_P_1588));     /* 1588 eth protocol type */
5335         else
5336                 wr32(E1000_ETQF(3), 0);
5337
5338 #define PTP_PORT 319
5339         /* L4 Queue Filter[3]: filter by destination port and protocol */
5340         if (is_l4) {
5341                 u32 ftqf = (IPPROTO_UDP /* UDP */
5342                         | E1000_FTQF_VF_BP /* VF not compared */
5343                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5344                         | E1000_FTQF_MASK); /* mask all inputs */
5345                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5346
5347                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5348                 wr32(E1000_IMIREXT(3),
5349                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5350                 if (hw->mac.type == e1000_82576) {
5351                         /* enable source port check */
5352                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5353                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5354                 }
5355                 wr32(E1000_FTQF(3), ftqf);
5356         } else {
5357                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5358         }
5359         wrfl();
5360
5361         adapter->hwtstamp_config = config;
5362
5363         /* clear TX/RX time stamp registers, just to be sure */
5364         regval = rd32(E1000_TXSTMPH);
5365         regval = rd32(E1000_RXSTMPH);
5366
5367         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5368                 -EFAULT : 0;
5369 }
5370
5371 /**
5372  * igb_ioctl -
5373  * @netdev:
5374  * @ifreq:
5375  * @cmd:
5376  **/
5377 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5378 {
5379         switch (cmd) {
5380         case SIOCGMIIPHY:
5381         case SIOCGMIIREG:
5382         case SIOCSMIIREG:
5383                 return igb_mii_ioctl(netdev, ifr, cmd);
5384         case SIOCSHWTSTAMP:
5385                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5386         default:
5387                 return -EOPNOTSUPP;
5388         }
5389 }
5390
5391 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5392 {
5393         struct igb_adapter *adapter = hw->back;
5394         u16 cap_offset;
5395
5396         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5397         if (!cap_offset)
5398                 return -E1000_ERR_CONFIG;
5399
5400         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5401
5402         return 0;
5403 }
5404
5405 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5406 {
5407         struct igb_adapter *adapter = hw->back;
5408         u16 cap_offset;
5409
5410         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5411         if (!cap_offset)
5412                 return -E1000_ERR_CONFIG;
5413
5414         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5415
5416         return 0;
5417 }
5418
5419 static void igb_vlan_rx_register(struct net_device *netdev,
5420                                  struct vlan_group *grp)
5421 {
5422         struct igb_adapter *adapter = netdev_priv(netdev);
5423         struct e1000_hw *hw = &adapter->hw;
5424         u32 ctrl, rctl;
5425
5426         igb_irq_disable(adapter);
5427         adapter->vlgrp = grp;
5428
5429         if (grp) {
5430                 /* enable VLAN tag insert/strip */
5431                 ctrl = rd32(E1000_CTRL);
5432                 ctrl |= E1000_CTRL_VME;
5433                 wr32(E1000_CTRL, ctrl);
5434
5435                 /* Disable CFI check */
5436                 rctl = rd32(E1000_RCTL);
5437                 rctl &= ~E1000_RCTL_CFIEN;
5438                 wr32(E1000_RCTL, rctl);
5439         } else {
5440                 /* disable VLAN tag insert/strip */
5441                 ctrl = rd32(E1000_CTRL);
5442                 ctrl &= ~E1000_CTRL_VME;
5443                 wr32(E1000_CTRL, ctrl);
5444         }
5445
5446         igb_rlpml_set(adapter);
5447
5448         if (!test_bit(__IGB_DOWN, &adapter->state))
5449                 igb_irq_enable(adapter);
5450 }
5451
5452 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5453 {
5454         struct igb_adapter *adapter = netdev_priv(netdev);
5455         struct e1000_hw *hw = &adapter->hw;
5456         int pf_id = adapter->vfs_allocated_count;
5457
5458         /* attempt to add filter to vlvf array */
5459         igb_vlvf_set(adapter, vid, true, pf_id);
5460
5461         /* add the filter since PF can receive vlans w/o entry in vlvf */
5462         igb_vfta_set(hw, vid, true);
5463 }
5464
5465 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5466 {
5467         struct igb_adapter *adapter = netdev_priv(netdev);
5468         struct e1000_hw *hw = &adapter->hw;
5469         int pf_id = adapter->vfs_allocated_count;
5470         s32 err;
5471
5472         igb_irq_disable(adapter);
5473         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5474
5475         if (!test_bit(__IGB_DOWN, &adapter->state))
5476                 igb_irq_enable(adapter);
5477
5478         /* remove vlan from VLVF table array */
5479         err = igb_vlvf_set(adapter, vid, false, pf_id);
5480
5481         /* if vid was not present in VLVF just remove it from table */
5482         if (err)
5483                 igb_vfta_set(hw, vid, false);
5484 }
5485
5486 static void igb_restore_vlan(struct igb_adapter *adapter)
5487 {
5488         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5489
5490         if (adapter->vlgrp) {
5491                 u16 vid;
5492                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5493                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5494                                 continue;
5495                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5496                 }
5497         }
5498 }
5499
5500 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5501 {
5502         struct pci_dev *pdev = adapter->pdev;
5503         struct e1000_mac_info *mac = &adapter->hw.mac;
5504
5505         mac->autoneg = 0;
5506
5507         switch (spddplx) {
5508         case SPEED_10 + DUPLEX_HALF:
5509                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5510                 break;
5511         case SPEED_10 + DUPLEX_FULL:
5512                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5513                 break;
5514         case SPEED_100 + DUPLEX_HALF:
5515                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5516                 break;
5517         case SPEED_100 + DUPLEX_FULL:
5518                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5519                 break;
5520         case SPEED_1000 + DUPLEX_FULL:
5521                 mac->autoneg = 1;
5522                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5523                 break;
5524         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5525         default:
5526                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5527                 return -EINVAL;
5528         }
5529         return 0;
5530 }
5531
5532 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5533 {
5534         struct net_device *netdev = pci_get_drvdata(pdev);
5535         struct igb_adapter *adapter = netdev_priv(netdev);
5536         struct e1000_hw *hw = &adapter->hw;
5537         u32 ctrl, rctl, status;
5538         u32 wufc = adapter->wol;
5539 #ifdef CONFIG_PM
5540         int retval = 0;
5541 #endif
5542
5543         netif_device_detach(netdev);
5544
5545         if (netif_running(netdev))
5546                 igb_close(netdev);
5547
5548         igb_clear_interrupt_scheme(adapter);
5549
5550 #ifdef CONFIG_PM
5551         retval = pci_save_state(pdev);
5552         if (retval)
5553                 return retval;
5554 #endif
5555
5556         status = rd32(E1000_STATUS);
5557         if (status & E1000_STATUS_LU)
5558                 wufc &= ~E1000_WUFC_LNKC;
5559
5560         if (wufc) {
5561                 igb_setup_rctl(adapter);
5562                 igb_set_rx_mode(netdev);
5563
5564                 /* turn on all-multi mode if wake on multicast is enabled */
5565                 if (wufc & E1000_WUFC_MC) {
5566                         rctl = rd32(E1000_RCTL);
5567                         rctl |= E1000_RCTL_MPE;
5568                         wr32(E1000_RCTL, rctl);
5569                 }
5570
5571                 ctrl = rd32(E1000_CTRL);
5572                 /* advertise wake from D3Cold */
5573                 #define E1000_CTRL_ADVD3WUC 0x00100000
5574                 /* phy power management enable */
5575                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5576                 ctrl |= E1000_CTRL_ADVD3WUC;
5577                 wr32(E1000_CTRL, ctrl);
5578
5579                 /* Allow time for pending master requests to run */
5580                 igb_disable_pcie_master(hw);
5581
5582                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5583                 wr32(E1000_WUFC, wufc);
5584         } else {
5585                 wr32(E1000_WUC, 0);
5586                 wr32(E1000_WUFC, 0);
5587         }
5588
5589         *enable_wake = wufc || adapter->en_mng_pt;
5590         if (!*enable_wake)
5591                 igb_shutdown_serdes_link_82575(hw);
5592
5593         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5594          * would have already happened in close and is redundant. */
5595         igb_release_hw_control(adapter);
5596
5597         pci_disable_device(pdev);
5598
5599         return 0;
5600 }
5601
5602 #ifdef CONFIG_PM
5603 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5604 {
5605         int retval;
5606         bool wake;
5607
5608         retval = __igb_shutdown(pdev, &wake);
5609         if (retval)
5610                 return retval;
5611
5612         if (wake) {
5613                 pci_prepare_to_sleep(pdev);
5614         } else {
5615                 pci_wake_from_d3(pdev, false);
5616                 pci_set_power_state(pdev, PCI_D3hot);
5617         }
5618
5619         return 0;
5620 }
5621
5622 static int igb_resume(struct pci_dev *pdev)
5623 {
5624         struct net_device *netdev = pci_get_drvdata(pdev);
5625         struct igb_adapter *adapter = netdev_priv(netdev);
5626         struct e1000_hw *hw = &adapter->hw;
5627         u32 err;
5628
5629         pci_set_power_state(pdev, PCI_D0);
5630         pci_restore_state(pdev);
5631
5632         err = pci_enable_device_mem(pdev);
5633         if (err) {
5634                 dev_err(&pdev->dev,
5635                         "igb: Cannot enable PCI device from suspend\n");
5636                 return err;
5637         }
5638         pci_set_master(pdev);
5639
5640         pci_enable_wake(pdev, PCI_D3hot, 0);
5641         pci_enable_wake(pdev, PCI_D3cold, 0);
5642
5643         if (igb_init_interrupt_scheme(adapter)) {
5644                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5645                 return -ENOMEM;
5646         }
5647
5648         /* e1000_power_up_phy(adapter); */
5649
5650         igb_reset(adapter);
5651
5652         /* let the f/w know that the h/w is now under the control of the
5653          * driver. */
5654         igb_get_hw_control(adapter);
5655
5656         wr32(E1000_WUS, ~0);
5657
5658         if (netif_running(netdev)) {
5659                 err = igb_open(netdev);
5660                 if (err)
5661                         return err;
5662         }
5663
5664         netif_device_attach(netdev);
5665
5666         return 0;
5667 }
5668 #endif
5669
5670 static void igb_shutdown(struct pci_dev *pdev)
5671 {
5672         bool wake;
5673
5674         __igb_shutdown(pdev, &wake);
5675
5676         if (system_state == SYSTEM_POWER_OFF) {
5677                 pci_wake_from_d3(pdev, wake);
5678                 pci_set_power_state(pdev, PCI_D3hot);
5679         }
5680 }
5681
5682 #ifdef CONFIG_NET_POLL_CONTROLLER
5683 /*
5684  * Polling 'interrupt' - used by things like netconsole to send skbs
5685  * without having to re-enable interrupts. It's not called while
5686  * the interrupt routine is executing.
5687  */
5688 static void igb_netpoll(struct net_device *netdev)
5689 {
5690         struct igb_adapter *adapter = netdev_priv(netdev);
5691         struct e1000_hw *hw = &adapter->hw;
5692         int i;
5693
5694         if (!adapter->msix_entries) {
5695                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5696                 igb_irq_disable(adapter);
5697                 napi_schedule(&q_vector->napi);
5698                 return;
5699         }
5700
5701         for (i = 0; i < adapter->num_q_vectors; i++) {
5702                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5703                 wr32(E1000_EIMC, q_vector->eims_value);
5704                 napi_schedule(&q_vector->napi);
5705         }
5706 }
5707 #endif /* CONFIG_NET_POLL_CONTROLLER */
5708
5709 /**
5710  * igb_io_error_detected - called when PCI error is detected
5711  * @pdev: Pointer to PCI device
5712  * @state: The current pci connection state
5713  *
5714  * This function is called after a PCI bus error affecting
5715  * this device has been detected.
5716  */
5717 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5718                                               pci_channel_state_t state)
5719 {
5720         struct net_device *netdev = pci_get_drvdata(pdev);
5721         struct igb_adapter *adapter = netdev_priv(netdev);
5722
5723         netif_device_detach(netdev);
5724
5725         if (state == pci_channel_io_perm_failure)
5726                 return PCI_ERS_RESULT_DISCONNECT;
5727
5728         if (netif_running(netdev))
5729                 igb_down(adapter);
5730         pci_disable_device(pdev);
5731
5732         /* Request a slot slot reset. */
5733         return PCI_ERS_RESULT_NEED_RESET;
5734 }
5735
5736 /**
5737  * igb_io_slot_reset - called after the pci bus has been reset.
5738  * @pdev: Pointer to PCI device
5739  *
5740  * Restart the card from scratch, as if from a cold-boot. Implementation
5741  * resembles the first-half of the igb_resume routine.
5742  */
5743 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5744 {
5745         struct net_device *netdev = pci_get_drvdata(pdev);
5746         struct igb_adapter *adapter = netdev_priv(netdev);
5747         struct e1000_hw *hw = &adapter->hw;
5748         pci_ers_result_t result;
5749         int err;
5750
5751         if (pci_enable_device_mem(pdev)) {
5752                 dev_err(&pdev->dev,
5753                         "Cannot re-enable PCI device after reset.\n");
5754                 result = PCI_ERS_RESULT_DISCONNECT;
5755         } else {
5756                 pci_set_master(pdev);
5757                 pci_restore_state(pdev);
5758
5759                 pci_enable_wake(pdev, PCI_D3hot, 0);
5760                 pci_enable_wake(pdev, PCI_D3cold, 0);
5761
5762                 igb_reset(adapter);
5763                 wr32(E1000_WUS, ~0);
5764                 result = PCI_ERS_RESULT_RECOVERED;
5765         }
5766
5767         err = pci_cleanup_aer_uncorrect_error_status(pdev);
5768         if (err) {
5769                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
5770                         "failed 0x%0x\n", err);
5771                 /* non-fatal, continue */
5772         }
5773
5774         return result;
5775 }
5776
5777 /**
5778  * igb_io_resume - called when traffic can start flowing again.
5779  * @pdev: Pointer to PCI device
5780  *
5781  * This callback is called when the error recovery driver tells us that
5782  * its OK to resume normal operation. Implementation resembles the
5783  * second-half of the igb_resume routine.
5784  */
5785 static void igb_io_resume(struct pci_dev *pdev)
5786 {
5787         struct net_device *netdev = pci_get_drvdata(pdev);
5788         struct igb_adapter *adapter = netdev_priv(netdev);
5789
5790         if (netif_running(netdev)) {
5791                 if (igb_up(adapter)) {
5792                         dev_err(&pdev->dev, "igb_up failed after reset\n");
5793                         return;
5794                 }
5795         }
5796
5797         netif_device_attach(netdev);
5798
5799         /* let the f/w know that the h/w is now under the control of the
5800          * driver. */
5801         igb_get_hw_control(adapter);
5802 }
5803
5804 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
5805                              u8 qsel)
5806 {
5807         u32 rar_low, rar_high;
5808         struct e1000_hw *hw = &adapter->hw;
5809
5810         /* HW expects these in little endian so we reverse the byte order
5811          * from network order (big endian) to little endian
5812          */
5813         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
5814                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
5815         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
5816
5817         /* Indicate to hardware the Address is Valid. */
5818         rar_high |= E1000_RAH_AV;
5819
5820         if (hw->mac.type == e1000_82575)
5821                 rar_high |= E1000_RAH_POOL_1 * qsel;
5822         else
5823                 rar_high |= E1000_RAH_POOL_1 << qsel;
5824
5825         wr32(E1000_RAL(index), rar_low);
5826         wrfl();
5827         wr32(E1000_RAH(index), rar_high);
5828         wrfl();
5829 }
5830
5831 static int igb_set_vf_mac(struct igb_adapter *adapter,
5832                           int vf, unsigned char *mac_addr)
5833 {
5834         struct e1000_hw *hw = &adapter->hw;
5835         /* VF MAC addresses start at end of receive addresses and moves
5836          * torwards the first, as a result a collision should not be possible */
5837         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
5838
5839         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
5840
5841         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
5842
5843         return 0;
5844 }
5845
5846 static void igb_vmm_control(struct igb_adapter *adapter)
5847 {
5848         struct e1000_hw *hw = &adapter->hw;
5849         u32 reg;
5850
5851         /* replication is not supported for 82575 */
5852         if (hw->mac.type == e1000_82575)
5853                 return;
5854
5855         /* enable replication vlan tag stripping */
5856         reg = rd32(E1000_RPLOLR);
5857         reg |= E1000_RPLOLR_STRVLAN;
5858         wr32(E1000_RPLOLR, reg);
5859
5860         /* notify HW that the MAC is adding vlan tags */
5861         reg = rd32(E1000_DTXCTL);
5862         reg |= E1000_DTXCTL_VLAN_ADDED;
5863         wr32(E1000_DTXCTL, reg);
5864
5865         if (adapter->vfs_allocated_count) {
5866                 igb_vmdq_set_loopback_pf(hw, true);
5867                 igb_vmdq_set_replication_pf(hw, true);
5868         } else {
5869                 igb_vmdq_set_loopback_pf(hw, false);
5870                 igb_vmdq_set_replication_pf(hw, false);
5871         }
5872 }
5873
5874 /* igb_main.c */