]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
Merge branch 'ebt_config_compat_v4' of git://git.breakpoint.cc/fw/nf-next-2.6
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <net/checksum.h>
36 #include <net/ip6_checksum.h>
37 #include <linux/net_tstamp.h>
38 #include <linux/mii.h>
39 #include <linux/ethtool.h>
40 #include <linux/if_vlan.h>
41 #include <linux/pci.h>
42 #include <linux/pci-aspm.h>
43 #include <linux/delay.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_ether.h>
46 #include <linux/aer.h>
47 #ifdef CONFIG_IGB_DCA
48 #include <linux/dca.h>
49 #endif
50 #include "igb.h"
51
52 #define DRV_VERSION "2.1.0-k2"
53 char igb_driver_name[] = "igb";
54 char igb_driver_version[] = DRV_VERSION;
55 static const char igb_driver_string[] =
56                                 "Intel(R) Gigabit Ethernet Network Driver";
57 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
58
59 static const struct e1000_info *igb_info_tbl[] = {
60         [board_82575] = &e1000_82575_info,
61 };
62
63 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
64         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
79         /* required last entry */
80         {0, }
81 };
82
83 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
84
85 void igb_reset(struct igb_adapter *);
86 static int igb_setup_all_tx_resources(struct igb_adapter *);
87 static int igb_setup_all_rx_resources(struct igb_adapter *);
88 static void igb_free_all_tx_resources(struct igb_adapter *);
89 static void igb_free_all_rx_resources(struct igb_adapter *);
90 static void igb_setup_mrqc(struct igb_adapter *);
91 void igb_update_stats(struct igb_adapter *);
92 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
93 static void __devexit igb_remove(struct pci_dev *pdev);
94 static int igb_sw_init(struct igb_adapter *);
95 static int igb_open(struct net_device *);
96 static int igb_close(struct net_device *);
97 static void igb_configure_tx(struct igb_adapter *);
98 static void igb_configure_rx(struct igb_adapter *);
99 static void igb_clean_all_tx_rings(struct igb_adapter *);
100 static void igb_clean_all_rx_rings(struct igb_adapter *);
101 static void igb_clean_tx_ring(struct igb_ring *);
102 static void igb_clean_rx_ring(struct igb_ring *);
103 static void igb_set_rx_mode(struct net_device *);
104 static void igb_update_phy_info(unsigned long);
105 static void igb_watchdog(unsigned long);
106 static void igb_watchdog_task(struct work_struct *);
107 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
108 static struct net_device_stats *igb_get_stats(struct net_device *);
109 static int igb_change_mtu(struct net_device *, int);
110 static int igb_set_mac(struct net_device *, void *);
111 static void igb_set_uta(struct igb_adapter *adapter);
112 static irqreturn_t igb_intr(int irq, void *);
113 static irqreturn_t igb_intr_msi(int irq, void *);
114 static irqreturn_t igb_msix_other(int irq, void *);
115 static irqreturn_t igb_msix_ring(int irq, void *);
116 #ifdef CONFIG_IGB_DCA
117 static void igb_update_dca(struct igb_q_vector *);
118 static void igb_setup_dca(struct igb_adapter *);
119 #endif /* CONFIG_IGB_DCA */
120 static bool igb_clean_tx_irq(struct igb_q_vector *);
121 static int igb_poll(struct napi_struct *, int);
122 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
123 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
124 static void igb_tx_timeout(struct net_device *);
125 static void igb_reset_task(struct work_struct *);
126 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
127 static void igb_vlan_rx_add_vid(struct net_device *, u16);
128 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
129 static void igb_restore_vlan(struct igb_adapter *);
130 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
131 static void igb_ping_all_vfs(struct igb_adapter *);
132 static void igb_msg_task(struct igb_adapter *);
133 static void igb_vmm_control(struct igb_adapter *);
134 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
135 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
136 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
137 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
138                                int vf, u16 vlan, u8 qos);
139 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
140 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
141                                  struct ifla_vf_info *ivi);
142
143 #ifdef CONFIG_PM
144 static int igb_suspend(struct pci_dev *, pm_message_t);
145 static int igb_resume(struct pci_dev *);
146 #endif
147 static void igb_shutdown(struct pci_dev *);
148 #ifdef CONFIG_IGB_DCA
149 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
150 static struct notifier_block dca_notifier = {
151         .notifier_call  = igb_notify_dca,
152         .next           = NULL,
153         .priority       = 0
154 };
155 #endif
156 #ifdef CONFIG_NET_POLL_CONTROLLER
157 /* for netdump / net console */
158 static void igb_netpoll(struct net_device *);
159 #endif
160 #ifdef CONFIG_PCI_IOV
161 static unsigned int max_vfs = 0;
162 module_param(max_vfs, uint, 0);
163 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
164                  "per physical function");
165 #endif /* CONFIG_PCI_IOV */
166
167 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
168                      pci_channel_state_t);
169 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
170 static void igb_io_resume(struct pci_dev *);
171
172 static struct pci_error_handlers igb_err_handler = {
173         .error_detected = igb_io_error_detected,
174         .slot_reset = igb_io_slot_reset,
175         .resume = igb_io_resume,
176 };
177
178
179 static struct pci_driver igb_driver = {
180         .name     = igb_driver_name,
181         .id_table = igb_pci_tbl,
182         .probe    = igb_probe,
183         .remove   = __devexit_p(igb_remove),
184 #ifdef CONFIG_PM
185         /* Power Managment Hooks */
186         .suspend  = igb_suspend,
187         .resume   = igb_resume,
188 #endif
189         .shutdown = igb_shutdown,
190         .err_handler = &igb_err_handler
191 };
192
193 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
194 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
195 MODULE_LICENSE("GPL");
196 MODULE_VERSION(DRV_VERSION);
197
198 /**
199  * igb_read_clock - read raw cycle counter (to be used by time counter)
200  */
201 static cycle_t igb_read_clock(const struct cyclecounter *tc)
202 {
203         struct igb_adapter *adapter =
204                 container_of(tc, struct igb_adapter, cycles);
205         struct e1000_hw *hw = &adapter->hw;
206         u64 stamp = 0;
207         int shift = 0;
208
209         /*
210          * The timestamp latches on lowest register read. For the 82580
211          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
212          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
213          */
214         if (hw->mac.type == e1000_82580) {
215                 stamp = rd32(E1000_SYSTIMR) >> 8;
216                 shift = IGB_82580_TSYNC_SHIFT;
217         }
218
219         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
220         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
221         return stamp;
222 }
223
224 #ifdef DEBUG
225 /**
226  * igb_get_hw_dev_name - return device name string
227  * used by hardware layer to print debugging information
228  **/
229 char *igb_get_hw_dev_name(struct e1000_hw *hw)
230 {
231         struct igb_adapter *adapter = hw->back;
232         return adapter->netdev->name;
233 }
234
235 /**
236  * igb_get_time_str - format current NIC and system time as string
237  */
238 static char *igb_get_time_str(struct igb_adapter *adapter,
239                               char buffer[160])
240 {
241         cycle_t hw = adapter->cycles.read(&adapter->cycles);
242         struct timespec nic = ns_to_timespec(timecounter_read(&adapter->clock));
243         struct timespec sys;
244         struct timespec delta;
245         getnstimeofday(&sys);
246
247         delta = timespec_sub(nic, sys);
248
249         sprintf(buffer,
250                 "HW %llu, NIC %ld.%09lus, SYS %ld.%09lus, NIC-SYS %lds + %09luns",
251                 hw,
252                 (long)nic.tv_sec, nic.tv_nsec,
253                 (long)sys.tv_sec, sys.tv_nsec,
254                 (long)delta.tv_sec, delta.tv_nsec);
255
256         return buffer;
257 }
258 #endif
259
260 /**
261  * igb_init_module - Driver Registration Routine
262  *
263  * igb_init_module is the first routine called when the driver is
264  * loaded. All it does is register with the PCI subsystem.
265  **/
266 static int __init igb_init_module(void)
267 {
268         int ret;
269         printk(KERN_INFO "%s - version %s\n",
270                igb_driver_string, igb_driver_version);
271
272         printk(KERN_INFO "%s\n", igb_copyright);
273
274 #ifdef CONFIG_IGB_DCA
275         dca_register_notify(&dca_notifier);
276 #endif
277         ret = pci_register_driver(&igb_driver);
278         return ret;
279 }
280
281 module_init(igb_init_module);
282
283 /**
284  * igb_exit_module - Driver Exit Cleanup Routine
285  *
286  * igb_exit_module is called just before the driver is removed
287  * from memory.
288  **/
289 static void __exit igb_exit_module(void)
290 {
291 #ifdef CONFIG_IGB_DCA
292         dca_unregister_notify(&dca_notifier);
293 #endif
294         pci_unregister_driver(&igb_driver);
295 }
296
297 module_exit(igb_exit_module);
298
299 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
300 /**
301  * igb_cache_ring_register - Descriptor ring to register mapping
302  * @adapter: board private structure to initialize
303  *
304  * Once we know the feature-set enabled for the device, we'll cache
305  * the register offset the descriptor ring is assigned to.
306  **/
307 static void igb_cache_ring_register(struct igb_adapter *adapter)
308 {
309         int i = 0, j = 0;
310         u32 rbase_offset = adapter->vfs_allocated_count;
311
312         switch (adapter->hw.mac.type) {
313         case e1000_82576:
314                 /* The queues are allocated for virtualization such that VF 0
315                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
316                  * In order to avoid collision we start at the first free queue
317                  * and continue consuming queues in the same sequence
318                  */
319                 if (adapter->vfs_allocated_count) {
320                         for (; i < adapter->rss_queues; i++)
321                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
322                                                                Q_IDX_82576(i);
323                         for (; j < adapter->rss_queues; j++)
324                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
325                                                                Q_IDX_82576(j);
326                 }
327         case e1000_82575:
328         case e1000_82580:
329         default:
330                 for (; i < adapter->num_rx_queues; i++)
331                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
332                 for (; j < adapter->num_tx_queues; j++)
333                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
334                 break;
335         }
336 }
337
338 static void igb_free_queues(struct igb_adapter *adapter)
339 {
340         int i;
341
342         for (i = 0; i < adapter->num_tx_queues; i++) {
343                 kfree(adapter->tx_ring[i]);
344                 adapter->tx_ring[i] = NULL;
345         }
346         for (i = 0; i < adapter->num_rx_queues; i++) {
347                 kfree(adapter->rx_ring[i]);
348                 adapter->rx_ring[i] = NULL;
349         }
350         adapter->num_rx_queues = 0;
351         adapter->num_tx_queues = 0;
352 }
353
354 /**
355  * igb_alloc_queues - Allocate memory for all rings
356  * @adapter: board private structure to initialize
357  *
358  * We allocate one ring per queue at run-time since we don't know the
359  * number of queues at compile-time.
360  **/
361 static int igb_alloc_queues(struct igb_adapter *adapter)
362 {
363         struct igb_ring *ring;
364         int i;
365
366         for (i = 0; i < adapter->num_tx_queues; i++) {
367                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
368                 if (!ring)
369                         goto err;
370                 ring->count = adapter->tx_ring_count;
371                 ring->queue_index = i;
372                 ring->pdev = adapter->pdev;
373                 ring->netdev = adapter->netdev;
374                 /* For 82575, context index must be unique per ring. */
375                 if (adapter->hw.mac.type == e1000_82575)
376                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
377                 adapter->tx_ring[i] = ring;
378         }
379
380         for (i = 0; i < adapter->num_rx_queues; i++) {
381                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
382                 if (!ring)
383                         goto err;
384                 ring->count = adapter->rx_ring_count;
385                 ring->queue_index = i;
386                 ring->pdev = adapter->pdev;
387                 ring->netdev = adapter->netdev;
388                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
389                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
390                 /* set flag indicating ring supports SCTP checksum offload */
391                 if (adapter->hw.mac.type >= e1000_82576)
392                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
393                 adapter->rx_ring[i] = ring;
394         }
395
396         igb_cache_ring_register(adapter);
397
398         return 0;
399
400 err:
401         igb_free_queues(adapter);
402
403         return -ENOMEM;
404 }
405
406 #define IGB_N0_QUEUE -1
407 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
408 {
409         u32 msixbm = 0;
410         struct igb_adapter *adapter = q_vector->adapter;
411         struct e1000_hw *hw = &adapter->hw;
412         u32 ivar, index;
413         int rx_queue = IGB_N0_QUEUE;
414         int tx_queue = IGB_N0_QUEUE;
415
416         if (q_vector->rx_ring)
417                 rx_queue = q_vector->rx_ring->reg_idx;
418         if (q_vector->tx_ring)
419                 tx_queue = q_vector->tx_ring->reg_idx;
420
421         switch (hw->mac.type) {
422         case e1000_82575:
423                 /* The 82575 assigns vectors using a bitmask, which matches the
424                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
425                    or more queues to a vector, we write the appropriate bits
426                    into the MSIXBM register for that vector. */
427                 if (rx_queue > IGB_N0_QUEUE)
428                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
429                 if (tx_queue > IGB_N0_QUEUE)
430                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
431                 if (!adapter->msix_entries && msix_vector == 0)
432                         msixbm |= E1000_EIMS_OTHER;
433                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
434                 q_vector->eims_value = msixbm;
435                 break;
436         case e1000_82576:
437                 /* 82576 uses a table-based method for assigning vectors.
438                    Each queue has a single entry in the table to which we write
439                    a vector number along with a "valid" bit.  Sadly, the layout
440                    of the table is somewhat counterintuitive. */
441                 if (rx_queue > IGB_N0_QUEUE) {
442                         index = (rx_queue & 0x7);
443                         ivar = array_rd32(E1000_IVAR0, index);
444                         if (rx_queue < 8) {
445                                 /* vector goes into low byte of register */
446                                 ivar = ivar & 0xFFFFFF00;
447                                 ivar |= msix_vector | E1000_IVAR_VALID;
448                         } else {
449                                 /* vector goes into third byte of register */
450                                 ivar = ivar & 0xFF00FFFF;
451                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
452                         }
453                         array_wr32(E1000_IVAR0, index, ivar);
454                 }
455                 if (tx_queue > IGB_N0_QUEUE) {
456                         index = (tx_queue & 0x7);
457                         ivar = array_rd32(E1000_IVAR0, index);
458                         if (tx_queue < 8) {
459                                 /* vector goes into second byte of register */
460                                 ivar = ivar & 0xFFFF00FF;
461                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
462                         } else {
463                                 /* vector goes into high byte of register */
464                                 ivar = ivar & 0x00FFFFFF;
465                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
466                         }
467                         array_wr32(E1000_IVAR0, index, ivar);
468                 }
469                 q_vector->eims_value = 1 << msix_vector;
470                 break;
471         case e1000_82580:
472                 /* 82580 uses the same table-based approach as 82576 but has fewer
473                    entries as a result we carry over for queues greater than 4. */
474                 if (rx_queue > IGB_N0_QUEUE) {
475                         index = (rx_queue >> 1);
476                         ivar = array_rd32(E1000_IVAR0, index);
477                         if (rx_queue & 0x1) {
478                                 /* vector goes into third byte of register */
479                                 ivar = ivar & 0xFF00FFFF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
481                         } else {
482                                 /* vector goes into low byte of register */
483                                 ivar = ivar & 0xFFFFFF00;
484                                 ivar |= msix_vector | E1000_IVAR_VALID;
485                         }
486                         array_wr32(E1000_IVAR0, index, ivar);
487                 }
488                 if (tx_queue > IGB_N0_QUEUE) {
489                         index = (tx_queue >> 1);
490                         ivar = array_rd32(E1000_IVAR0, index);
491                         if (tx_queue & 0x1) {
492                                 /* vector goes into high byte of register */
493                                 ivar = ivar & 0x00FFFFFF;
494                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
495                         } else {
496                                 /* vector goes into second byte of register */
497                                 ivar = ivar & 0xFFFF00FF;
498                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
499                         }
500                         array_wr32(E1000_IVAR0, index, ivar);
501                 }
502                 q_vector->eims_value = 1 << msix_vector;
503                 break;
504         default:
505                 BUG();
506                 break;
507         }
508
509         /* add q_vector eims value to global eims_enable_mask */
510         adapter->eims_enable_mask |= q_vector->eims_value;
511
512         /* configure q_vector to set itr on first interrupt */
513         q_vector->set_itr = 1;
514 }
515
516 /**
517  * igb_configure_msix - Configure MSI-X hardware
518  *
519  * igb_configure_msix sets up the hardware to properly
520  * generate MSI-X interrupts.
521  **/
522 static void igb_configure_msix(struct igb_adapter *adapter)
523 {
524         u32 tmp;
525         int i, vector = 0;
526         struct e1000_hw *hw = &adapter->hw;
527
528         adapter->eims_enable_mask = 0;
529
530         /* set vector for other causes, i.e. link changes */
531         switch (hw->mac.type) {
532         case e1000_82575:
533                 tmp = rd32(E1000_CTRL_EXT);
534                 /* enable MSI-X PBA support*/
535                 tmp |= E1000_CTRL_EXT_PBA_CLR;
536
537                 /* Auto-Mask interrupts upon ICR read. */
538                 tmp |= E1000_CTRL_EXT_EIAME;
539                 tmp |= E1000_CTRL_EXT_IRCA;
540
541                 wr32(E1000_CTRL_EXT, tmp);
542
543                 /* enable msix_other interrupt */
544                 array_wr32(E1000_MSIXBM(0), vector++,
545                                       E1000_EIMS_OTHER);
546                 adapter->eims_other = E1000_EIMS_OTHER;
547
548                 break;
549
550         case e1000_82576:
551         case e1000_82580:
552                 /* Turn on MSI-X capability first, or our settings
553                  * won't stick.  And it will take days to debug. */
554                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
555                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
556                                 E1000_GPIE_NSICR);
557
558                 /* enable msix_other interrupt */
559                 adapter->eims_other = 1 << vector;
560                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
561
562                 wr32(E1000_IVAR_MISC, tmp);
563                 break;
564         default:
565                 /* do nothing, since nothing else supports MSI-X */
566                 break;
567         } /* switch (hw->mac.type) */
568
569         adapter->eims_enable_mask |= adapter->eims_other;
570
571         for (i = 0; i < adapter->num_q_vectors; i++)
572                 igb_assign_vector(adapter->q_vector[i], vector++);
573
574         wrfl();
575 }
576
577 /**
578  * igb_request_msix - Initialize MSI-X interrupts
579  *
580  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
581  * kernel.
582  **/
583 static int igb_request_msix(struct igb_adapter *adapter)
584 {
585         struct net_device *netdev = adapter->netdev;
586         struct e1000_hw *hw = &adapter->hw;
587         int i, err = 0, vector = 0;
588
589         err = request_irq(adapter->msix_entries[vector].vector,
590                           igb_msix_other, 0, netdev->name, adapter);
591         if (err)
592                 goto out;
593         vector++;
594
595         for (i = 0; i < adapter->num_q_vectors; i++) {
596                 struct igb_q_vector *q_vector = adapter->q_vector[i];
597
598                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
599
600                 if (q_vector->rx_ring && q_vector->tx_ring)
601                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
602                                 q_vector->rx_ring->queue_index);
603                 else if (q_vector->tx_ring)
604                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
605                                 q_vector->tx_ring->queue_index);
606                 else if (q_vector->rx_ring)
607                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
608                                 q_vector->rx_ring->queue_index);
609                 else
610                         sprintf(q_vector->name, "%s-unused", netdev->name);
611
612                 err = request_irq(adapter->msix_entries[vector].vector,
613                                   igb_msix_ring, 0, q_vector->name,
614                                   q_vector);
615                 if (err)
616                         goto out;
617                 vector++;
618         }
619
620         igb_configure_msix(adapter);
621         return 0;
622 out:
623         return err;
624 }
625
626 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
627 {
628         if (adapter->msix_entries) {
629                 pci_disable_msix(adapter->pdev);
630                 kfree(adapter->msix_entries);
631                 adapter->msix_entries = NULL;
632         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
633                 pci_disable_msi(adapter->pdev);
634         }
635 }
636
637 /**
638  * igb_free_q_vectors - Free memory allocated for interrupt vectors
639  * @adapter: board private structure to initialize
640  *
641  * This function frees the memory allocated to the q_vectors.  In addition if
642  * NAPI is enabled it will delete any references to the NAPI struct prior
643  * to freeing the q_vector.
644  **/
645 static void igb_free_q_vectors(struct igb_adapter *adapter)
646 {
647         int v_idx;
648
649         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
650                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
651                 adapter->q_vector[v_idx] = NULL;
652                 if (!q_vector)
653                         continue;
654                 netif_napi_del(&q_vector->napi);
655                 kfree(q_vector);
656         }
657         adapter->num_q_vectors = 0;
658 }
659
660 /**
661  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
662  *
663  * This function resets the device so that it has 0 rx queues, tx queues, and
664  * MSI-X interrupts allocated.
665  */
666 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
667 {
668         igb_free_queues(adapter);
669         igb_free_q_vectors(adapter);
670         igb_reset_interrupt_capability(adapter);
671 }
672
673 /**
674  * igb_set_interrupt_capability - set MSI or MSI-X if supported
675  *
676  * Attempt to configure interrupts using the best available
677  * capabilities of the hardware and kernel.
678  **/
679 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
680 {
681         int err;
682         int numvecs, i;
683
684         /* Number of supported queues. */
685         adapter->num_rx_queues = adapter->rss_queues;
686         adapter->num_tx_queues = adapter->rss_queues;
687
688         /* start with one vector for every rx queue */
689         numvecs = adapter->num_rx_queues;
690
691         /* if tx handler is seperate add 1 for every tx queue */
692         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
693                 numvecs += adapter->num_tx_queues;
694
695         /* store the number of vectors reserved for queues */
696         adapter->num_q_vectors = numvecs;
697
698         /* add 1 vector for link status interrupts */
699         numvecs++;
700         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
701                                         GFP_KERNEL);
702         if (!adapter->msix_entries)
703                 goto msi_only;
704
705         for (i = 0; i < numvecs; i++)
706                 adapter->msix_entries[i].entry = i;
707
708         err = pci_enable_msix(adapter->pdev,
709                               adapter->msix_entries,
710                               numvecs);
711         if (err == 0)
712                 goto out;
713
714         igb_reset_interrupt_capability(adapter);
715
716         /* If we can't do MSI-X, try MSI */
717 msi_only:
718 #ifdef CONFIG_PCI_IOV
719         /* disable SR-IOV for non MSI-X configurations */
720         if (adapter->vf_data) {
721                 struct e1000_hw *hw = &adapter->hw;
722                 /* disable iov and allow time for transactions to clear */
723                 pci_disable_sriov(adapter->pdev);
724                 msleep(500);
725
726                 kfree(adapter->vf_data);
727                 adapter->vf_data = NULL;
728                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
729                 msleep(100);
730                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
731         }
732 #endif
733         adapter->vfs_allocated_count = 0;
734         adapter->rss_queues = 1;
735         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
736         adapter->num_rx_queues = 1;
737         adapter->num_tx_queues = 1;
738         adapter->num_q_vectors = 1;
739         if (!pci_enable_msi(adapter->pdev))
740                 adapter->flags |= IGB_FLAG_HAS_MSI;
741 out:
742         /* Notify the stack of the (possibly) reduced Tx Queue count. */
743         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
744         return;
745 }
746
747 /**
748  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
749  * @adapter: board private structure to initialize
750  *
751  * We allocate one q_vector per queue interrupt.  If allocation fails we
752  * return -ENOMEM.
753  **/
754 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
755 {
756         struct igb_q_vector *q_vector;
757         struct e1000_hw *hw = &adapter->hw;
758         int v_idx;
759
760         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
761                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
762                 if (!q_vector)
763                         goto err_out;
764                 q_vector->adapter = adapter;
765                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
766                 q_vector->itr_val = IGB_START_ITR;
767                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
768                 adapter->q_vector[v_idx] = q_vector;
769         }
770         return 0;
771
772 err_out:
773         igb_free_q_vectors(adapter);
774         return -ENOMEM;
775 }
776
777 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
778                                       int ring_idx, int v_idx)
779 {
780         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
781
782         q_vector->rx_ring = adapter->rx_ring[ring_idx];
783         q_vector->rx_ring->q_vector = q_vector;
784         q_vector->itr_val = adapter->rx_itr_setting;
785         if (q_vector->itr_val && q_vector->itr_val <= 3)
786                 q_vector->itr_val = IGB_START_ITR;
787 }
788
789 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
790                                       int ring_idx, int v_idx)
791 {
792         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
793
794         q_vector->tx_ring = adapter->tx_ring[ring_idx];
795         q_vector->tx_ring->q_vector = q_vector;
796         q_vector->itr_val = adapter->tx_itr_setting;
797         if (q_vector->itr_val && q_vector->itr_val <= 3)
798                 q_vector->itr_val = IGB_START_ITR;
799 }
800
801 /**
802  * igb_map_ring_to_vector - maps allocated queues to vectors
803  *
804  * This function maps the recently allocated queues to vectors.
805  **/
806 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
807 {
808         int i;
809         int v_idx = 0;
810
811         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
812             (adapter->num_q_vectors < adapter->num_tx_queues))
813                 return -ENOMEM;
814
815         if (adapter->num_q_vectors >=
816             (adapter->num_rx_queues + adapter->num_tx_queues)) {
817                 for (i = 0; i < adapter->num_rx_queues; i++)
818                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
819                 for (i = 0; i < adapter->num_tx_queues; i++)
820                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
821         } else {
822                 for (i = 0; i < adapter->num_rx_queues; i++) {
823                         if (i < adapter->num_tx_queues)
824                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
825                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
826                 }
827                 for (; i < adapter->num_tx_queues; i++)
828                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
829         }
830         return 0;
831 }
832
833 /**
834  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
835  *
836  * This function initializes the interrupts and allocates all of the queues.
837  **/
838 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
839 {
840         struct pci_dev *pdev = adapter->pdev;
841         int err;
842
843         igb_set_interrupt_capability(adapter);
844
845         err = igb_alloc_q_vectors(adapter);
846         if (err) {
847                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
848                 goto err_alloc_q_vectors;
849         }
850
851         err = igb_alloc_queues(adapter);
852         if (err) {
853                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
854                 goto err_alloc_queues;
855         }
856
857         err = igb_map_ring_to_vector(adapter);
858         if (err) {
859                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
860                 goto err_map_queues;
861         }
862
863
864         return 0;
865 err_map_queues:
866         igb_free_queues(adapter);
867 err_alloc_queues:
868         igb_free_q_vectors(adapter);
869 err_alloc_q_vectors:
870         igb_reset_interrupt_capability(adapter);
871         return err;
872 }
873
874 /**
875  * igb_request_irq - initialize interrupts
876  *
877  * Attempts to configure interrupts using the best available
878  * capabilities of the hardware and kernel.
879  **/
880 static int igb_request_irq(struct igb_adapter *adapter)
881 {
882         struct net_device *netdev = adapter->netdev;
883         struct pci_dev *pdev = adapter->pdev;
884         int err = 0;
885
886         if (adapter->msix_entries) {
887                 err = igb_request_msix(adapter);
888                 if (!err)
889                         goto request_done;
890                 /* fall back to MSI */
891                 igb_clear_interrupt_scheme(adapter);
892                 if (!pci_enable_msi(adapter->pdev))
893                         adapter->flags |= IGB_FLAG_HAS_MSI;
894                 igb_free_all_tx_resources(adapter);
895                 igb_free_all_rx_resources(adapter);
896                 adapter->num_tx_queues = 1;
897                 adapter->num_rx_queues = 1;
898                 adapter->num_q_vectors = 1;
899                 err = igb_alloc_q_vectors(adapter);
900                 if (err) {
901                         dev_err(&pdev->dev,
902                                 "Unable to allocate memory for vectors\n");
903                         goto request_done;
904                 }
905                 err = igb_alloc_queues(adapter);
906                 if (err) {
907                         dev_err(&pdev->dev,
908                                 "Unable to allocate memory for queues\n");
909                         igb_free_q_vectors(adapter);
910                         goto request_done;
911                 }
912                 igb_setup_all_tx_resources(adapter);
913                 igb_setup_all_rx_resources(adapter);
914         } else {
915                 igb_assign_vector(adapter->q_vector[0], 0);
916         }
917
918         if (adapter->flags & IGB_FLAG_HAS_MSI) {
919                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
920                                   netdev->name, adapter);
921                 if (!err)
922                         goto request_done;
923
924                 /* fall back to legacy interrupts */
925                 igb_reset_interrupt_capability(adapter);
926                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
927         }
928
929         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
930                           netdev->name, adapter);
931
932         if (err)
933                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
934                         err);
935
936 request_done:
937         return err;
938 }
939
940 static void igb_free_irq(struct igb_adapter *adapter)
941 {
942         if (adapter->msix_entries) {
943                 int vector = 0, i;
944
945                 free_irq(adapter->msix_entries[vector++].vector, adapter);
946
947                 for (i = 0; i < adapter->num_q_vectors; i++) {
948                         struct igb_q_vector *q_vector = adapter->q_vector[i];
949                         free_irq(adapter->msix_entries[vector++].vector,
950                                  q_vector);
951                 }
952         } else {
953                 free_irq(adapter->pdev->irq, adapter);
954         }
955 }
956
957 /**
958  * igb_irq_disable - Mask off interrupt generation on the NIC
959  * @adapter: board private structure
960  **/
961 static void igb_irq_disable(struct igb_adapter *adapter)
962 {
963         struct e1000_hw *hw = &adapter->hw;
964
965         /*
966          * we need to be careful when disabling interrupts.  The VFs are also
967          * mapped into these registers and so clearing the bits can cause
968          * issues on the VF drivers so we only need to clear what we set
969          */
970         if (adapter->msix_entries) {
971                 u32 regval = rd32(E1000_EIAM);
972                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
973                 wr32(E1000_EIMC, adapter->eims_enable_mask);
974                 regval = rd32(E1000_EIAC);
975                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
976         }
977
978         wr32(E1000_IAM, 0);
979         wr32(E1000_IMC, ~0);
980         wrfl();
981         synchronize_irq(adapter->pdev->irq);
982 }
983
984 /**
985  * igb_irq_enable - Enable default interrupt generation settings
986  * @adapter: board private structure
987  **/
988 static void igb_irq_enable(struct igb_adapter *adapter)
989 {
990         struct e1000_hw *hw = &adapter->hw;
991
992         if (adapter->msix_entries) {
993                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
994                 u32 regval = rd32(E1000_EIAC);
995                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
996                 regval = rd32(E1000_EIAM);
997                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
998                 wr32(E1000_EIMS, adapter->eims_enable_mask);
999                 if (adapter->vfs_allocated_count) {
1000                         wr32(E1000_MBVFIMR, 0xFF);
1001                         ims |= E1000_IMS_VMMB;
1002                 }
1003                 if (adapter->hw.mac.type == e1000_82580)
1004                         ims |= E1000_IMS_DRSTA;
1005
1006                 wr32(E1000_IMS, ims);
1007         } else {
1008                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1009                                 E1000_IMS_DRSTA);
1010                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1011                                 E1000_IMS_DRSTA);
1012         }
1013 }
1014
1015 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1016 {
1017         struct e1000_hw *hw = &adapter->hw;
1018         u16 vid = adapter->hw.mng_cookie.vlan_id;
1019         u16 old_vid = adapter->mng_vlan_id;
1020
1021         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1022                 /* add VID to filter table */
1023                 igb_vfta_set(hw, vid, true);
1024                 adapter->mng_vlan_id = vid;
1025         } else {
1026                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1027         }
1028
1029         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1030             (vid != old_vid) &&
1031             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1032                 /* remove VID from filter table */
1033                 igb_vfta_set(hw, old_vid, false);
1034         }
1035 }
1036
1037 /**
1038  * igb_release_hw_control - release control of the h/w to f/w
1039  * @adapter: address of board private structure
1040  *
1041  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1042  * For ASF and Pass Through versions of f/w this means that the
1043  * driver is no longer loaded.
1044  *
1045  **/
1046 static void igb_release_hw_control(struct igb_adapter *adapter)
1047 {
1048         struct e1000_hw *hw = &adapter->hw;
1049         u32 ctrl_ext;
1050
1051         /* Let firmware take over control of h/w */
1052         ctrl_ext = rd32(E1000_CTRL_EXT);
1053         wr32(E1000_CTRL_EXT,
1054                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1055 }
1056
1057 /**
1058  * igb_get_hw_control - get control of the h/w from f/w
1059  * @adapter: address of board private structure
1060  *
1061  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1062  * For ASF and Pass Through versions of f/w this means that
1063  * the driver is loaded.
1064  *
1065  **/
1066 static void igb_get_hw_control(struct igb_adapter *adapter)
1067 {
1068         struct e1000_hw *hw = &adapter->hw;
1069         u32 ctrl_ext;
1070
1071         /* Let firmware know the driver has taken over */
1072         ctrl_ext = rd32(E1000_CTRL_EXT);
1073         wr32(E1000_CTRL_EXT,
1074                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1075 }
1076
1077 /**
1078  * igb_configure - configure the hardware for RX and TX
1079  * @adapter: private board structure
1080  **/
1081 static void igb_configure(struct igb_adapter *adapter)
1082 {
1083         struct net_device *netdev = adapter->netdev;
1084         int i;
1085
1086         igb_get_hw_control(adapter);
1087         igb_set_rx_mode(netdev);
1088
1089         igb_restore_vlan(adapter);
1090
1091         igb_setup_tctl(adapter);
1092         igb_setup_mrqc(adapter);
1093         igb_setup_rctl(adapter);
1094
1095         igb_configure_tx(adapter);
1096         igb_configure_rx(adapter);
1097
1098         igb_rx_fifo_flush_82575(&adapter->hw);
1099
1100         /* call igb_desc_unused which always leaves
1101          * at least 1 descriptor unused to make sure
1102          * next_to_use != next_to_clean */
1103         for (i = 0; i < adapter->num_rx_queues; i++) {
1104                 struct igb_ring *ring = adapter->rx_ring[i];
1105                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1106         }
1107
1108
1109         adapter->tx_queue_len = netdev->tx_queue_len;
1110 }
1111
1112 /**
1113  * igb_power_up_link - Power up the phy/serdes link
1114  * @adapter: address of board private structure
1115  **/
1116 void igb_power_up_link(struct igb_adapter *adapter)
1117 {
1118         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1119                 igb_power_up_phy_copper(&adapter->hw);
1120         else
1121                 igb_power_up_serdes_link_82575(&adapter->hw);
1122 }
1123
1124 /**
1125  * igb_power_down_link - Power down the phy/serdes link
1126  * @adapter: address of board private structure
1127  */
1128 static void igb_power_down_link(struct igb_adapter *adapter)
1129 {
1130         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1131                 igb_power_down_phy_copper_82575(&adapter->hw);
1132         else
1133                 igb_shutdown_serdes_link_82575(&adapter->hw);
1134 }
1135
1136 /**
1137  * igb_up - Open the interface and prepare it to handle traffic
1138  * @adapter: board private structure
1139  **/
1140 int igb_up(struct igb_adapter *adapter)
1141 {
1142         struct e1000_hw *hw = &adapter->hw;
1143         int i;
1144
1145         /* hardware has been reset, we need to reload some things */
1146         igb_configure(adapter);
1147
1148         clear_bit(__IGB_DOWN, &adapter->state);
1149
1150         for (i = 0; i < adapter->num_q_vectors; i++) {
1151                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1152                 napi_enable(&q_vector->napi);
1153         }
1154         if (adapter->msix_entries)
1155                 igb_configure_msix(adapter);
1156         else
1157                 igb_assign_vector(adapter->q_vector[0], 0);
1158
1159         /* Clear any pending interrupts. */
1160         rd32(E1000_ICR);
1161         igb_irq_enable(adapter);
1162
1163         /* notify VFs that reset has been completed */
1164         if (adapter->vfs_allocated_count) {
1165                 u32 reg_data = rd32(E1000_CTRL_EXT);
1166                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1167                 wr32(E1000_CTRL_EXT, reg_data);
1168         }
1169
1170         netif_tx_start_all_queues(adapter->netdev);
1171
1172         /* start the watchdog. */
1173         hw->mac.get_link_status = 1;
1174         schedule_work(&adapter->watchdog_task);
1175
1176         return 0;
1177 }
1178
1179 void igb_down(struct igb_adapter *adapter)
1180 {
1181         struct net_device *netdev = adapter->netdev;
1182         struct e1000_hw *hw = &adapter->hw;
1183         u32 tctl, rctl;
1184         int i;
1185
1186         /* signal that we're down so the interrupt handler does not
1187          * reschedule our watchdog timer */
1188         set_bit(__IGB_DOWN, &adapter->state);
1189
1190         /* disable receives in the hardware */
1191         rctl = rd32(E1000_RCTL);
1192         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1193         /* flush and sleep below */
1194
1195         netif_tx_stop_all_queues(netdev);
1196
1197         /* disable transmits in the hardware */
1198         tctl = rd32(E1000_TCTL);
1199         tctl &= ~E1000_TCTL_EN;
1200         wr32(E1000_TCTL, tctl);
1201         /* flush both disables and wait for them to finish */
1202         wrfl();
1203         msleep(10);
1204
1205         for (i = 0; i < adapter->num_q_vectors; i++) {
1206                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1207                 napi_disable(&q_vector->napi);
1208         }
1209
1210         igb_irq_disable(adapter);
1211
1212         del_timer_sync(&adapter->watchdog_timer);
1213         del_timer_sync(&adapter->phy_info_timer);
1214
1215         netdev->tx_queue_len = adapter->tx_queue_len;
1216         netif_carrier_off(netdev);
1217
1218         /* record the stats before reset*/
1219         igb_update_stats(adapter);
1220
1221         adapter->link_speed = 0;
1222         adapter->link_duplex = 0;
1223
1224         if (!pci_channel_offline(adapter->pdev))
1225                 igb_reset(adapter);
1226         igb_clean_all_tx_rings(adapter);
1227         igb_clean_all_rx_rings(adapter);
1228 #ifdef CONFIG_IGB_DCA
1229
1230         /* since we reset the hardware DCA settings were cleared */
1231         igb_setup_dca(adapter);
1232 #endif
1233 }
1234
1235 void igb_reinit_locked(struct igb_adapter *adapter)
1236 {
1237         WARN_ON(in_interrupt());
1238         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1239                 msleep(1);
1240         igb_down(adapter);
1241         igb_up(adapter);
1242         clear_bit(__IGB_RESETTING, &adapter->state);
1243 }
1244
1245 void igb_reset(struct igb_adapter *adapter)
1246 {
1247         struct pci_dev *pdev = adapter->pdev;
1248         struct e1000_hw *hw = &adapter->hw;
1249         struct e1000_mac_info *mac = &hw->mac;
1250         struct e1000_fc_info *fc = &hw->fc;
1251         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1252         u16 hwm;
1253
1254         /* Repartition Pba for greater than 9k mtu
1255          * To take effect CTRL.RST is required.
1256          */
1257         switch (mac->type) {
1258         case e1000_82580:
1259                 pba = rd32(E1000_RXPBS);
1260                 pba = igb_rxpbs_adjust_82580(pba);
1261                 break;
1262         case e1000_82576:
1263                 pba = rd32(E1000_RXPBS);
1264                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1265                 break;
1266         case e1000_82575:
1267         default:
1268                 pba = E1000_PBA_34K;
1269                 break;
1270         }
1271
1272         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1273             (mac->type < e1000_82576)) {
1274                 /* adjust PBA for jumbo frames */
1275                 wr32(E1000_PBA, pba);
1276
1277                 /* To maintain wire speed transmits, the Tx FIFO should be
1278                  * large enough to accommodate two full transmit packets,
1279                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1280                  * the Rx FIFO should be large enough to accommodate at least
1281                  * one full receive packet and is similarly rounded up and
1282                  * expressed in KB. */
1283                 pba = rd32(E1000_PBA);
1284                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1285                 tx_space = pba >> 16;
1286                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1287                 pba &= 0xffff;
1288                 /* the tx fifo also stores 16 bytes of information about the tx
1289                  * but don't include ethernet FCS because hardware appends it */
1290                 min_tx_space = (adapter->max_frame_size +
1291                                 sizeof(union e1000_adv_tx_desc) -
1292                                 ETH_FCS_LEN) * 2;
1293                 min_tx_space = ALIGN(min_tx_space, 1024);
1294                 min_tx_space >>= 10;
1295                 /* software strips receive CRC, so leave room for it */
1296                 min_rx_space = adapter->max_frame_size;
1297                 min_rx_space = ALIGN(min_rx_space, 1024);
1298                 min_rx_space >>= 10;
1299
1300                 /* If current Tx allocation is less than the min Tx FIFO size,
1301                  * and the min Tx FIFO size is less than the current Rx FIFO
1302                  * allocation, take space away from current Rx allocation */
1303                 if (tx_space < min_tx_space &&
1304                     ((min_tx_space - tx_space) < pba)) {
1305                         pba = pba - (min_tx_space - tx_space);
1306
1307                         /* if short on rx space, rx wins and must trump tx
1308                          * adjustment */
1309                         if (pba < min_rx_space)
1310                                 pba = min_rx_space;
1311                 }
1312                 wr32(E1000_PBA, pba);
1313         }
1314
1315         /* flow control settings */
1316         /* The high water mark must be low enough to fit one full frame
1317          * (or the size used for early receive) above it in the Rx FIFO.
1318          * Set it to the lower of:
1319          * - 90% of the Rx FIFO size, or
1320          * - the full Rx FIFO size minus one full frame */
1321         hwm = min(((pba << 10) * 9 / 10),
1322                         ((pba << 10) - 2 * adapter->max_frame_size));
1323
1324         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1325         fc->low_water = fc->high_water - 16;
1326         fc->pause_time = 0xFFFF;
1327         fc->send_xon = 1;
1328         fc->current_mode = fc->requested_mode;
1329
1330         /* disable receive for all VFs and wait one second */
1331         if (adapter->vfs_allocated_count) {
1332                 int i;
1333                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1334                         adapter->vf_data[i].flags = 0;
1335
1336                 /* ping all the active vfs to let them know we are going down */
1337                 igb_ping_all_vfs(adapter);
1338
1339                 /* disable transmits and receives */
1340                 wr32(E1000_VFRE, 0);
1341                 wr32(E1000_VFTE, 0);
1342         }
1343
1344         /* Allow time for pending master requests to run */
1345         hw->mac.ops.reset_hw(hw);
1346         wr32(E1000_WUC, 0);
1347
1348         if (hw->mac.ops.init_hw(hw))
1349                 dev_err(&pdev->dev, "Hardware Error\n");
1350
1351         if (hw->mac.type == e1000_82580) {
1352                 u32 reg = rd32(E1000_PCIEMISC);
1353                 wr32(E1000_PCIEMISC,
1354                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1355         }
1356         if (!netif_running(adapter->netdev))
1357                 igb_power_down_link(adapter);
1358
1359         igb_update_mng_vlan(adapter);
1360
1361         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1362         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1363
1364         igb_get_phy_info(hw);
1365 }
1366
1367 static const struct net_device_ops igb_netdev_ops = {
1368         .ndo_open               = igb_open,
1369         .ndo_stop               = igb_close,
1370         .ndo_start_xmit         = igb_xmit_frame_adv,
1371         .ndo_get_stats          = igb_get_stats,
1372         .ndo_set_rx_mode        = igb_set_rx_mode,
1373         .ndo_set_multicast_list = igb_set_rx_mode,
1374         .ndo_set_mac_address    = igb_set_mac,
1375         .ndo_change_mtu         = igb_change_mtu,
1376         .ndo_do_ioctl           = igb_ioctl,
1377         .ndo_tx_timeout         = igb_tx_timeout,
1378         .ndo_validate_addr      = eth_validate_addr,
1379         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1380         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1381         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1382         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1383         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1384         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1385         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1386 #ifdef CONFIG_NET_POLL_CONTROLLER
1387         .ndo_poll_controller    = igb_netpoll,
1388 #endif
1389 };
1390
1391 /**
1392  * igb_probe - Device Initialization Routine
1393  * @pdev: PCI device information struct
1394  * @ent: entry in igb_pci_tbl
1395  *
1396  * Returns 0 on success, negative on failure
1397  *
1398  * igb_probe initializes an adapter identified by a pci_dev structure.
1399  * The OS initialization, configuring of the adapter private structure,
1400  * and a hardware reset occur.
1401  **/
1402 static int __devinit igb_probe(struct pci_dev *pdev,
1403                                const struct pci_device_id *ent)
1404 {
1405         struct net_device *netdev;
1406         struct igb_adapter *adapter;
1407         struct e1000_hw *hw;
1408         u16 eeprom_data = 0;
1409         static int global_quad_port_a; /* global quad port a indication */
1410         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1411         unsigned long mmio_start, mmio_len;
1412         int err, pci_using_dac;
1413         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1414         u32 part_num;
1415
1416         err = pci_enable_device_mem(pdev);
1417         if (err)
1418                 return err;
1419
1420         pci_using_dac = 0;
1421         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1422         if (!err) {
1423                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1424                 if (!err)
1425                         pci_using_dac = 1;
1426         } else {
1427                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1428                 if (err) {
1429                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1430                         if (err) {
1431                                 dev_err(&pdev->dev, "No usable DMA "
1432                                         "configuration, aborting\n");
1433                                 goto err_dma;
1434                         }
1435                 }
1436         }
1437
1438         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1439                                            IORESOURCE_MEM),
1440                                            igb_driver_name);
1441         if (err)
1442                 goto err_pci_reg;
1443
1444         pci_enable_pcie_error_reporting(pdev);
1445
1446         pci_set_master(pdev);
1447         pci_save_state(pdev);
1448
1449         err = -ENOMEM;
1450         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1451                                    IGB_ABS_MAX_TX_QUEUES);
1452         if (!netdev)
1453                 goto err_alloc_etherdev;
1454
1455         SET_NETDEV_DEV(netdev, &pdev->dev);
1456
1457         pci_set_drvdata(pdev, netdev);
1458         adapter = netdev_priv(netdev);
1459         adapter->netdev = netdev;
1460         adapter->pdev = pdev;
1461         hw = &adapter->hw;
1462         hw->back = adapter;
1463         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1464
1465         mmio_start = pci_resource_start(pdev, 0);
1466         mmio_len = pci_resource_len(pdev, 0);
1467
1468         err = -EIO;
1469         hw->hw_addr = ioremap(mmio_start, mmio_len);
1470         if (!hw->hw_addr)
1471                 goto err_ioremap;
1472
1473         netdev->netdev_ops = &igb_netdev_ops;
1474         igb_set_ethtool_ops(netdev);
1475         netdev->watchdog_timeo = 5 * HZ;
1476
1477         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1478
1479         netdev->mem_start = mmio_start;
1480         netdev->mem_end = mmio_start + mmio_len;
1481
1482         /* PCI config space info */
1483         hw->vendor_id = pdev->vendor;
1484         hw->device_id = pdev->device;
1485         hw->revision_id = pdev->revision;
1486         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1487         hw->subsystem_device_id = pdev->subsystem_device;
1488
1489         /* Copy the default MAC, PHY and NVM function pointers */
1490         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1491         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1492         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1493         /* Initialize skew-specific constants */
1494         err = ei->get_invariants(hw);
1495         if (err)
1496                 goto err_sw_init;
1497
1498         /* setup the private structure */
1499         err = igb_sw_init(adapter);
1500         if (err)
1501                 goto err_sw_init;
1502
1503         igb_get_bus_info_pcie(hw);
1504
1505         hw->phy.autoneg_wait_to_complete = false;
1506
1507         /* Copper options */
1508         if (hw->phy.media_type == e1000_media_type_copper) {
1509                 hw->phy.mdix = AUTO_ALL_MODES;
1510                 hw->phy.disable_polarity_correction = false;
1511                 hw->phy.ms_type = e1000_ms_hw_default;
1512         }
1513
1514         if (igb_check_reset_block(hw))
1515                 dev_info(&pdev->dev,
1516                         "PHY reset is blocked due to SOL/IDER session.\n");
1517
1518         netdev->features = NETIF_F_SG |
1519                            NETIF_F_IP_CSUM |
1520                            NETIF_F_HW_VLAN_TX |
1521                            NETIF_F_HW_VLAN_RX |
1522                            NETIF_F_HW_VLAN_FILTER;
1523
1524         netdev->features |= NETIF_F_IPV6_CSUM;
1525         netdev->features |= NETIF_F_TSO;
1526         netdev->features |= NETIF_F_TSO6;
1527         netdev->features |= NETIF_F_GRO;
1528
1529         netdev->vlan_features |= NETIF_F_TSO;
1530         netdev->vlan_features |= NETIF_F_TSO6;
1531         netdev->vlan_features |= NETIF_F_IP_CSUM;
1532         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1533         netdev->vlan_features |= NETIF_F_SG;
1534
1535         if (pci_using_dac)
1536                 netdev->features |= NETIF_F_HIGHDMA;
1537
1538         if (hw->mac.type >= e1000_82576)
1539                 netdev->features |= NETIF_F_SCTP_CSUM;
1540
1541         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1542
1543         /* before reading the NVM, reset the controller to put the device in a
1544          * known good starting state */
1545         hw->mac.ops.reset_hw(hw);
1546
1547         /* make sure the NVM is good */
1548         if (igb_validate_nvm_checksum(hw) < 0) {
1549                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1550                 err = -EIO;
1551                 goto err_eeprom;
1552         }
1553
1554         /* copy the MAC address out of the NVM */
1555         if (hw->mac.ops.read_mac_addr(hw))
1556                 dev_err(&pdev->dev, "NVM Read Error\n");
1557
1558         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1559         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1560
1561         if (!is_valid_ether_addr(netdev->perm_addr)) {
1562                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1563                 err = -EIO;
1564                 goto err_eeprom;
1565         }
1566
1567         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1568                     (unsigned long) adapter);
1569         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1570                     (unsigned long) adapter);
1571
1572         INIT_WORK(&adapter->reset_task, igb_reset_task);
1573         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1574
1575         /* Initialize link properties that are user-changeable */
1576         adapter->fc_autoneg = true;
1577         hw->mac.autoneg = true;
1578         hw->phy.autoneg_advertised = 0x2f;
1579
1580         hw->fc.requested_mode = e1000_fc_default;
1581         hw->fc.current_mode = e1000_fc_default;
1582
1583         igb_validate_mdi_setting(hw);
1584
1585         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1586          * enable the ACPI Magic Packet filter
1587          */
1588
1589         if (hw->bus.func == 0)
1590                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1591         else if (hw->mac.type == e1000_82580)
1592                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1593                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1594                                  &eeprom_data);
1595         else if (hw->bus.func == 1)
1596                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1597
1598         if (eeprom_data & eeprom_apme_mask)
1599                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1600
1601         /* now that we have the eeprom settings, apply the special cases where
1602          * the eeprom may be wrong or the board simply won't support wake on
1603          * lan on a particular port */
1604         switch (pdev->device) {
1605         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1606                 adapter->eeprom_wol = 0;
1607                 break;
1608         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1609         case E1000_DEV_ID_82576_FIBER:
1610         case E1000_DEV_ID_82576_SERDES:
1611                 /* Wake events only supported on port A for dual fiber
1612                  * regardless of eeprom setting */
1613                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1614                         adapter->eeprom_wol = 0;
1615                 break;
1616         case E1000_DEV_ID_82576_QUAD_COPPER:
1617                 /* if quad port adapter, disable WoL on all but port A */
1618                 if (global_quad_port_a != 0)
1619                         adapter->eeprom_wol = 0;
1620                 else
1621                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1622                 /* Reset for multiple quad port adapters */
1623                 if (++global_quad_port_a == 4)
1624                         global_quad_port_a = 0;
1625                 break;
1626         }
1627
1628         /* initialize the wol settings based on the eeprom settings */
1629         adapter->wol = adapter->eeprom_wol;
1630         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1631
1632         /* reset the hardware with the new settings */
1633         igb_reset(adapter);
1634
1635         /* let the f/w know that the h/w is now under the control of the
1636          * driver. */
1637         igb_get_hw_control(adapter);
1638
1639         strcpy(netdev->name, "eth%d");
1640         err = register_netdev(netdev);
1641         if (err)
1642                 goto err_register;
1643
1644         /* carrier off reporting is important to ethtool even BEFORE open */
1645         netif_carrier_off(netdev);
1646
1647 #ifdef CONFIG_IGB_DCA
1648         if (dca_add_requester(&pdev->dev) == 0) {
1649                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1650                 dev_info(&pdev->dev, "DCA enabled\n");
1651                 igb_setup_dca(adapter);
1652         }
1653
1654 #endif
1655         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1656         /* print bus type/speed/width info */
1657         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1658                  netdev->name,
1659                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1660                                                             "unknown"),
1661                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1662                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1663                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1664                    "unknown"),
1665                  netdev->dev_addr);
1666
1667         igb_read_part_num(hw, &part_num);
1668         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1669                 (part_num >> 8), (part_num & 0xff));
1670
1671         dev_info(&pdev->dev,
1672                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1673                 adapter->msix_entries ? "MSI-X" :
1674                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1675                 adapter->num_rx_queues, adapter->num_tx_queues);
1676
1677         return 0;
1678
1679 err_register:
1680         igb_release_hw_control(adapter);
1681 err_eeprom:
1682         if (!igb_check_reset_block(hw))
1683                 igb_reset_phy(hw);
1684
1685         if (hw->flash_address)
1686                 iounmap(hw->flash_address);
1687 err_sw_init:
1688         igb_clear_interrupt_scheme(adapter);
1689         iounmap(hw->hw_addr);
1690 err_ioremap:
1691         free_netdev(netdev);
1692 err_alloc_etherdev:
1693         pci_release_selected_regions(pdev,
1694                                      pci_select_bars(pdev, IORESOURCE_MEM));
1695 err_pci_reg:
1696 err_dma:
1697         pci_disable_device(pdev);
1698         return err;
1699 }
1700
1701 /**
1702  * igb_remove - Device Removal Routine
1703  * @pdev: PCI device information struct
1704  *
1705  * igb_remove is called by the PCI subsystem to alert the driver
1706  * that it should release a PCI device.  The could be caused by a
1707  * Hot-Plug event, or because the driver is going to be removed from
1708  * memory.
1709  **/
1710 static void __devexit igb_remove(struct pci_dev *pdev)
1711 {
1712         struct net_device *netdev = pci_get_drvdata(pdev);
1713         struct igb_adapter *adapter = netdev_priv(netdev);
1714         struct e1000_hw *hw = &adapter->hw;
1715
1716         /* flush_scheduled work may reschedule our watchdog task, so
1717          * explicitly disable watchdog tasks from being rescheduled  */
1718         set_bit(__IGB_DOWN, &adapter->state);
1719         del_timer_sync(&adapter->watchdog_timer);
1720         del_timer_sync(&adapter->phy_info_timer);
1721
1722         flush_scheduled_work();
1723
1724 #ifdef CONFIG_IGB_DCA
1725         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1726                 dev_info(&pdev->dev, "DCA disabled\n");
1727                 dca_remove_requester(&pdev->dev);
1728                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1729                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1730         }
1731 #endif
1732
1733         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1734          * would have already happened in close and is redundant. */
1735         igb_release_hw_control(adapter);
1736
1737         unregister_netdev(netdev);
1738
1739         igb_clear_interrupt_scheme(adapter);
1740
1741 #ifdef CONFIG_PCI_IOV
1742         /* reclaim resources allocated to VFs */
1743         if (adapter->vf_data) {
1744                 /* disable iov and allow time for transactions to clear */
1745                 pci_disable_sriov(pdev);
1746                 msleep(500);
1747
1748                 kfree(adapter->vf_data);
1749                 adapter->vf_data = NULL;
1750                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1751                 msleep(100);
1752                 dev_info(&pdev->dev, "IOV Disabled\n");
1753         }
1754 #endif
1755
1756         iounmap(hw->hw_addr);
1757         if (hw->flash_address)
1758                 iounmap(hw->flash_address);
1759         pci_release_selected_regions(pdev,
1760                                      pci_select_bars(pdev, IORESOURCE_MEM));
1761
1762         free_netdev(netdev);
1763
1764         pci_disable_pcie_error_reporting(pdev);
1765
1766         pci_disable_device(pdev);
1767 }
1768
1769 /**
1770  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1771  * @adapter: board private structure to initialize
1772  *
1773  * This function initializes the vf specific data storage and then attempts to
1774  * allocate the VFs.  The reason for ordering it this way is because it is much
1775  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1776  * the memory for the VFs.
1777  **/
1778 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1779 {
1780 #ifdef CONFIG_PCI_IOV
1781         struct pci_dev *pdev = adapter->pdev;
1782
1783         if (adapter->vfs_allocated_count > 7)
1784                 adapter->vfs_allocated_count = 7;
1785
1786         if (adapter->vfs_allocated_count) {
1787                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1788                                            sizeof(struct vf_data_storage),
1789                                            GFP_KERNEL);
1790                 /* if allocation failed then we do not support SR-IOV */
1791                 if (!adapter->vf_data) {
1792                         adapter->vfs_allocated_count = 0;
1793                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1794                                 "Data Storage\n");
1795                 }
1796         }
1797
1798         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1799                 kfree(adapter->vf_data);
1800                 adapter->vf_data = NULL;
1801 #endif /* CONFIG_PCI_IOV */
1802                 adapter->vfs_allocated_count = 0;
1803 #ifdef CONFIG_PCI_IOV
1804         } else {
1805                 unsigned char mac_addr[ETH_ALEN];
1806                 int i;
1807                 dev_info(&pdev->dev, "%d vfs allocated\n",
1808                          adapter->vfs_allocated_count);
1809                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1810                         random_ether_addr(mac_addr);
1811                         igb_set_vf_mac(adapter, i, mac_addr);
1812                 }
1813         }
1814 #endif /* CONFIG_PCI_IOV */
1815 }
1816
1817
1818 /**
1819  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1820  * @adapter: board private structure to initialize
1821  *
1822  * igb_init_hw_timer initializes the function pointer and values for the hw
1823  * timer found in hardware.
1824  **/
1825 static void igb_init_hw_timer(struct igb_adapter *adapter)
1826 {
1827         struct e1000_hw *hw = &adapter->hw;
1828
1829         switch (hw->mac.type) {
1830         case e1000_82580:
1831                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1832                 adapter->cycles.read = igb_read_clock;
1833                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1834                 adapter->cycles.mult = 1;
1835                 /*
1836                  * The 82580 timesync updates the system timer every 8ns by 8ns
1837                  * and the value cannot be shifted.  Instead we need to shift
1838                  * the registers to generate a 64bit timer value.  As a result
1839                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1840                  * 24 in order to generate a larger value for synchronization.
1841                  */
1842                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1843                 /* disable system timer temporarily by setting bit 31 */
1844                 wr32(E1000_TSAUXC, 0x80000000);
1845                 wrfl();
1846
1847                 /* Set registers so that rollover occurs soon to test this. */
1848                 wr32(E1000_SYSTIMR, 0x00000000);
1849                 wr32(E1000_SYSTIML, 0x80000000);
1850                 wr32(E1000_SYSTIMH, 0x000000FF);
1851                 wrfl();
1852
1853                 /* enable system timer by clearing bit 31 */
1854                 wr32(E1000_TSAUXC, 0x0);
1855                 wrfl();
1856
1857                 timecounter_init(&adapter->clock,
1858                                  &adapter->cycles,
1859                                  ktime_to_ns(ktime_get_real()));
1860                 /*
1861                  * Synchronize our NIC clock against system wall clock. NIC
1862                  * time stamp reading requires ~3us per sample, each sample
1863                  * was pretty stable even under load => only require 10
1864                  * samples for each offset comparison.
1865                  */
1866                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1867                 adapter->compare.source = &adapter->clock;
1868                 adapter->compare.target = ktime_get_real;
1869                 adapter->compare.num_samples = 10;
1870                 timecompare_update(&adapter->compare, 0);
1871                 break;
1872         case e1000_82576:
1873                 /*
1874                  * Initialize hardware timer: we keep it running just in case
1875                  * that some program needs it later on.
1876                  */
1877                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1878                 adapter->cycles.read = igb_read_clock;
1879                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1880                 adapter->cycles.mult = 1;
1881                 /**
1882                  * Scale the NIC clock cycle by a large factor so that
1883                  * relatively small clock corrections can be added or
1884                  * substracted at each clock tick. The drawbacks of a large
1885                  * factor are a) that the clock register overflows more quickly
1886                  * (not such a big deal) and b) that the increment per tick has
1887                  * to fit into 24 bits.  As a result we need to use a shift of
1888                  * 19 so we can fit a value of 16 into the TIMINCA register.
1889                  */
1890                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1891                 wr32(E1000_TIMINCA,
1892                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1893                                 (16 << IGB_82576_TSYNC_SHIFT));
1894
1895                 /* Set registers so that rollover occurs soon to test this. */
1896                 wr32(E1000_SYSTIML, 0x00000000);
1897                 wr32(E1000_SYSTIMH, 0xFF800000);
1898                 wrfl();
1899
1900                 timecounter_init(&adapter->clock,
1901                                  &adapter->cycles,
1902                                  ktime_to_ns(ktime_get_real()));
1903                 /*
1904                  * Synchronize our NIC clock against system wall clock. NIC
1905                  * time stamp reading requires ~3us per sample, each sample
1906                  * was pretty stable even under load => only require 10
1907                  * samples for each offset comparison.
1908                  */
1909                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1910                 adapter->compare.source = &adapter->clock;
1911                 adapter->compare.target = ktime_get_real;
1912                 adapter->compare.num_samples = 10;
1913                 timecompare_update(&adapter->compare, 0);
1914                 break;
1915         case e1000_82575:
1916                 /* 82575 does not support timesync */
1917         default:
1918                 break;
1919         }
1920
1921 }
1922
1923 /**
1924  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1925  * @adapter: board private structure to initialize
1926  *
1927  * igb_sw_init initializes the Adapter private data structure.
1928  * Fields are initialized based on PCI device information and
1929  * OS network device settings (MTU size).
1930  **/
1931 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1932 {
1933         struct e1000_hw *hw = &adapter->hw;
1934         struct net_device *netdev = adapter->netdev;
1935         struct pci_dev *pdev = adapter->pdev;
1936
1937         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1938
1939         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1940         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1941         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1942         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1943
1944         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1945         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1946
1947 #ifdef CONFIG_PCI_IOV
1948         if (hw->mac.type == e1000_82576)
1949                 adapter->vfs_allocated_count = max_vfs;
1950
1951 #endif /* CONFIG_PCI_IOV */
1952         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1953
1954         /*
1955          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1956          * then we should combine the queues into a queue pair in order to
1957          * conserve interrupts due to limited supply
1958          */
1959         if ((adapter->rss_queues > 4) ||
1960             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1961                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1962
1963         /* This call may decrease the number of queues */
1964         if (igb_init_interrupt_scheme(adapter)) {
1965                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1966                 return -ENOMEM;
1967         }
1968
1969         igb_init_hw_timer(adapter);
1970         igb_probe_vfs(adapter);
1971
1972         /* Explicitly disable IRQ since the NIC can be in any state. */
1973         igb_irq_disable(adapter);
1974
1975         set_bit(__IGB_DOWN, &adapter->state);
1976         return 0;
1977 }
1978
1979 /**
1980  * igb_open - Called when a network interface is made active
1981  * @netdev: network interface device structure
1982  *
1983  * Returns 0 on success, negative value on failure
1984  *
1985  * The open entry point is called when a network interface is made
1986  * active by the system (IFF_UP).  At this point all resources needed
1987  * for transmit and receive operations are allocated, the interrupt
1988  * handler is registered with the OS, the watchdog timer is started,
1989  * and the stack is notified that the interface is ready.
1990  **/
1991 static int igb_open(struct net_device *netdev)
1992 {
1993         struct igb_adapter *adapter = netdev_priv(netdev);
1994         struct e1000_hw *hw = &adapter->hw;
1995         int err;
1996         int i;
1997
1998         /* disallow open during test */
1999         if (test_bit(__IGB_TESTING, &adapter->state))
2000                 return -EBUSY;
2001
2002         netif_carrier_off(netdev);
2003
2004         /* allocate transmit descriptors */
2005         err = igb_setup_all_tx_resources(adapter);
2006         if (err)
2007                 goto err_setup_tx;
2008
2009         /* allocate receive descriptors */
2010         err = igb_setup_all_rx_resources(adapter);
2011         if (err)
2012                 goto err_setup_rx;
2013
2014         igb_power_up_link(adapter);
2015
2016         /* before we allocate an interrupt, we must be ready to handle it.
2017          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2018          * as soon as we call pci_request_irq, so we have to setup our
2019          * clean_rx handler before we do so.  */
2020         igb_configure(adapter);
2021
2022         err = igb_request_irq(adapter);
2023         if (err)
2024                 goto err_req_irq;
2025
2026         /* From here on the code is the same as igb_up() */
2027         clear_bit(__IGB_DOWN, &adapter->state);
2028
2029         for (i = 0; i < adapter->num_q_vectors; i++) {
2030                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2031                 napi_enable(&q_vector->napi);
2032         }
2033
2034         /* Clear any pending interrupts. */
2035         rd32(E1000_ICR);
2036
2037         igb_irq_enable(adapter);
2038
2039         /* notify VFs that reset has been completed */
2040         if (adapter->vfs_allocated_count) {
2041                 u32 reg_data = rd32(E1000_CTRL_EXT);
2042                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2043                 wr32(E1000_CTRL_EXT, reg_data);
2044         }
2045
2046         netif_tx_start_all_queues(netdev);
2047
2048         /* start the watchdog. */
2049         hw->mac.get_link_status = 1;
2050         schedule_work(&adapter->watchdog_task);
2051
2052         return 0;
2053
2054 err_req_irq:
2055         igb_release_hw_control(adapter);
2056         igb_power_down_link(adapter);
2057         igb_free_all_rx_resources(adapter);
2058 err_setup_rx:
2059         igb_free_all_tx_resources(adapter);
2060 err_setup_tx:
2061         igb_reset(adapter);
2062
2063         return err;
2064 }
2065
2066 /**
2067  * igb_close - Disables a network interface
2068  * @netdev: network interface device structure
2069  *
2070  * Returns 0, this is not allowed to fail
2071  *
2072  * The close entry point is called when an interface is de-activated
2073  * by the OS.  The hardware is still under the driver's control, but
2074  * needs to be disabled.  A global MAC reset is issued to stop the
2075  * hardware, and all transmit and receive resources are freed.
2076  **/
2077 static int igb_close(struct net_device *netdev)
2078 {
2079         struct igb_adapter *adapter = netdev_priv(netdev);
2080
2081         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2082         igb_down(adapter);
2083
2084         igb_free_irq(adapter);
2085
2086         igb_free_all_tx_resources(adapter);
2087         igb_free_all_rx_resources(adapter);
2088
2089         return 0;
2090 }
2091
2092 /**
2093  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2094  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2095  *
2096  * Return 0 on success, negative on failure
2097  **/
2098 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2099 {
2100         struct pci_dev *pdev = tx_ring->pdev;
2101         int size;
2102
2103         size = sizeof(struct igb_buffer) * tx_ring->count;
2104         tx_ring->buffer_info = vmalloc(size);
2105         if (!tx_ring->buffer_info)
2106                 goto err;
2107         memset(tx_ring->buffer_info, 0, size);
2108
2109         /* round up to nearest 4K */
2110         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2111         tx_ring->size = ALIGN(tx_ring->size, 4096);
2112
2113         tx_ring->desc = pci_alloc_consistent(pdev,
2114                                              tx_ring->size,
2115                                              &tx_ring->dma);
2116
2117         if (!tx_ring->desc)
2118                 goto err;
2119
2120         tx_ring->next_to_use = 0;
2121         tx_ring->next_to_clean = 0;
2122         return 0;
2123
2124 err:
2125         vfree(tx_ring->buffer_info);
2126         dev_err(&pdev->dev,
2127                 "Unable to allocate memory for the transmit descriptor ring\n");
2128         return -ENOMEM;
2129 }
2130
2131 /**
2132  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2133  *                                (Descriptors) for all queues
2134  * @adapter: board private structure
2135  *
2136  * Return 0 on success, negative on failure
2137  **/
2138 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2139 {
2140         struct pci_dev *pdev = adapter->pdev;
2141         int i, err = 0;
2142
2143         for (i = 0; i < adapter->num_tx_queues; i++) {
2144                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2145                 if (err) {
2146                         dev_err(&pdev->dev,
2147                                 "Allocation for Tx Queue %u failed\n", i);
2148                         for (i--; i >= 0; i--)
2149                                 igb_free_tx_resources(adapter->tx_ring[i]);
2150                         break;
2151                 }
2152         }
2153
2154         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2155                 int r_idx = i % adapter->num_tx_queues;
2156                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2157         }
2158         return err;
2159 }
2160
2161 /**
2162  * igb_setup_tctl - configure the transmit control registers
2163  * @adapter: Board private structure
2164  **/
2165 void igb_setup_tctl(struct igb_adapter *adapter)
2166 {
2167         struct e1000_hw *hw = &adapter->hw;
2168         u32 tctl;
2169
2170         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2171         wr32(E1000_TXDCTL(0), 0);
2172
2173         /* Program the Transmit Control Register */
2174         tctl = rd32(E1000_TCTL);
2175         tctl &= ~E1000_TCTL_CT;
2176         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2177                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2178
2179         igb_config_collision_dist(hw);
2180
2181         /* Enable transmits */
2182         tctl |= E1000_TCTL_EN;
2183
2184         wr32(E1000_TCTL, tctl);
2185 }
2186
2187 /**
2188  * igb_configure_tx_ring - Configure transmit ring after Reset
2189  * @adapter: board private structure
2190  * @ring: tx ring to configure
2191  *
2192  * Configure a transmit ring after a reset.
2193  **/
2194 void igb_configure_tx_ring(struct igb_adapter *adapter,
2195                            struct igb_ring *ring)
2196 {
2197         struct e1000_hw *hw = &adapter->hw;
2198         u32 txdctl;
2199         u64 tdba = ring->dma;
2200         int reg_idx = ring->reg_idx;
2201
2202         /* disable the queue */
2203         txdctl = rd32(E1000_TXDCTL(reg_idx));
2204         wr32(E1000_TXDCTL(reg_idx),
2205                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2206         wrfl();
2207         mdelay(10);
2208
2209         wr32(E1000_TDLEN(reg_idx),
2210                         ring->count * sizeof(union e1000_adv_tx_desc));
2211         wr32(E1000_TDBAL(reg_idx),
2212                         tdba & 0x00000000ffffffffULL);
2213         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2214
2215         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2216         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2217         writel(0, ring->head);
2218         writel(0, ring->tail);
2219
2220         txdctl |= IGB_TX_PTHRESH;
2221         txdctl |= IGB_TX_HTHRESH << 8;
2222         txdctl |= IGB_TX_WTHRESH << 16;
2223
2224         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2225         wr32(E1000_TXDCTL(reg_idx), txdctl);
2226 }
2227
2228 /**
2229  * igb_configure_tx - Configure transmit Unit after Reset
2230  * @adapter: board private structure
2231  *
2232  * Configure the Tx unit of the MAC after a reset.
2233  **/
2234 static void igb_configure_tx(struct igb_adapter *adapter)
2235 {
2236         int i;
2237
2238         for (i = 0; i < adapter->num_tx_queues; i++)
2239                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2240 }
2241
2242 /**
2243  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2244  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2245  *
2246  * Returns 0 on success, negative on failure
2247  **/
2248 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2249 {
2250         struct pci_dev *pdev = rx_ring->pdev;
2251         int size, desc_len;
2252
2253         size = sizeof(struct igb_buffer) * rx_ring->count;
2254         rx_ring->buffer_info = vmalloc(size);
2255         if (!rx_ring->buffer_info)
2256                 goto err;
2257         memset(rx_ring->buffer_info, 0, size);
2258
2259         desc_len = sizeof(union e1000_adv_rx_desc);
2260
2261         /* Round up to nearest 4K */
2262         rx_ring->size = rx_ring->count * desc_len;
2263         rx_ring->size = ALIGN(rx_ring->size, 4096);
2264
2265         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2266                                              &rx_ring->dma);
2267
2268         if (!rx_ring->desc)
2269                 goto err;
2270
2271         rx_ring->next_to_clean = 0;
2272         rx_ring->next_to_use = 0;
2273
2274         return 0;
2275
2276 err:
2277         vfree(rx_ring->buffer_info);
2278         rx_ring->buffer_info = NULL;
2279         dev_err(&pdev->dev, "Unable to allocate memory for "
2280                 "the receive descriptor ring\n");
2281         return -ENOMEM;
2282 }
2283
2284 /**
2285  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2286  *                                (Descriptors) for all queues
2287  * @adapter: board private structure
2288  *
2289  * Return 0 on success, negative on failure
2290  **/
2291 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2292 {
2293         struct pci_dev *pdev = adapter->pdev;
2294         int i, err = 0;
2295
2296         for (i = 0; i < adapter->num_rx_queues; i++) {
2297                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2298                 if (err) {
2299                         dev_err(&pdev->dev,
2300                                 "Allocation for Rx Queue %u failed\n", i);
2301                         for (i--; i >= 0; i--)
2302                                 igb_free_rx_resources(adapter->rx_ring[i]);
2303                         break;
2304                 }
2305         }
2306
2307         return err;
2308 }
2309
2310 /**
2311  * igb_setup_mrqc - configure the multiple receive queue control registers
2312  * @adapter: Board private structure
2313  **/
2314 static void igb_setup_mrqc(struct igb_adapter *adapter)
2315 {
2316         struct e1000_hw *hw = &adapter->hw;
2317         u32 mrqc, rxcsum;
2318         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2319         union e1000_reta {
2320                 u32 dword;
2321                 u8  bytes[4];
2322         } reta;
2323         static const u8 rsshash[40] = {
2324                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2325                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2326                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2327                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2328
2329         /* Fill out hash function seeds */
2330         for (j = 0; j < 10; j++) {
2331                 u32 rsskey = rsshash[(j * 4)];
2332                 rsskey |= rsshash[(j * 4) + 1] << 8;
2333                 rsskey |= rsshash[(j * 4) + 2] << 16;
2334                 rsskey |= rsshash[(j * 4) + 3] << 24;
2335                 array_wr32(E1000_RSSRK(0), j, rsskey);
2336         }
2337
2338         num_rx_queues = adapter->rss_queues;
2339
2340         if (adapter->vfs_allocated_count) {
2341                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2342                 switch (hw->mac.type) {
2343                 case e1000_82580:
2344                         num_rx_queues = 1;
2345                         shift = 0;
2346                         break;
2347                 case e1000_82576:
2348                         shift = 3;
2349                         num_rx_queues = 2;
2350                         break;
2351                 case e1000_82575:
2352                         shift = 2;
2353                         shift2 = 6;
2354                 default:
2355                         break;
2356                 }
2357         } else {
2358                 if (hw->mac.type == e1000_82575)
2359                         shift = 6;
2360         }
2361
2362         for (j = 0; j < (32 * 4); j++) {
2363                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2364                 if (shift2)
2365                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2366                 if ((j & 3) == 3)
2367                         wr32(E1000_RETA(j >> 2), reta.dword);
2368         }
2369
2370         /*
2371          * Disable raw packet checksumming so that RSS hash is placed in
2372          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2373          * offloads as they are enabled by default
2374          */
2375         rxcsum = rd32(E1000_RXCSUM);
2376         rxcsum |= E1000_RXCSUM_PCSD;
2377
2378         if (adapter->hw.mac.type >= e1000_82576)
2379                 /* Enable Receive Checksum Offload for SCTP */
2380                 rxcsum |= E1000_RXCSUM_CRCOFL;
2381
2382         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2383         wr32(E1000_RXCSUM, rxcsum);
2384
2385         /* If VMDq is enabled then we set the appropriate mode for that, else
2386          * we default to RSS so that an RSS hash is calculated per packet even
2387          * if we are only using one queue */
2388         if (adapter->vfs_allocated_count) {
2389                 if (hw->mac.type > e1000_82575) {
2390                         /* Set the default pool for the PF's first queue */
2391                         u32 vtctl = rd32(E1000_VT_CTL);
2392                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2393                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2394                         vtctl |= adapter->vfs_allocated_count <<
2395                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2396                         wr32(E1000_VT_CTL, vtctl);
2397                 }
2398                 if (adapter->rss_queues > 1)
2399                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2400                 else
2401                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2402         } else {
2403                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2404         }
2405         igb_vmm_control(adapter);
2406
2407         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2408                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2409         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2410                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2411         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2412                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2413         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2414                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2415
2416         wr32(E1000_MRQC, mrqc);
2417 }
2418
2419 /**
2420  * igb_setup_rctl - configure the receive control registers
2421  * @adapter: Board private structure
2422  **/
2423 void igb_setup_rctl(struct igb_adapter *adapter)
2424 {
2425         struct e1000_hw *hw = &adapter->hw;
2426         u32 rctl;
2427
2428         rctl = rd32(E1000_RCTL);
2429
2430         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2431         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2432
2433         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2434                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2435
2436         /*
2437          * enable stripping of CRC. It's unlikely this will break BMC
2438          * redirection as it did with e1000. Newer features require
2439          * that the HW strips the CRC.
2440          */
2441         rctl |= E1000_RCTL_SECRC;
2442
2443         /* disable store bad packets and clear size bits. */
2444         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2445
2446         /* enable LPE to prevent packets larger than max_frame_size */
2447         rctl |= E1000_RCTL_LPE;
2448
2449         /* disable queue 0 to prevent tail write w/o re-config */
2450         wr32(E1000_RXDCTL(0), 0);
2451
2452         /* Attention!!!  For SR-IOV PF driver operations you must enable
2453          * queue drop for all VF and PF queues to prevent head of line blocking
2454          * if an un-trusted VF does not provide descriptors to hardware.
2455          */
2456         if (adapter->vfs_allocated_count) {
2457                 /* set all queue drop enable bits */
2458                 wr32(E1000_QDE, ALL_QUEUES);
2459         }
2460
2461         wr32(E1000_RCTL, rctl);
2462 }
2463
2464 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2465                                    int vfn)
2466 {
2467         struct e1000_hw *hw = &adapter->hw;
2468         u32 vmolr;
2469
2470         /* if it isn't the PF check to see if VFs are enabled and
2471          * increase the size to support vlan tags */
2472         if (vfn < adapter->vfs_allocated_count &&
2473             adapter->vf_data[vfn].vlans_enabled)
2474                 size += VLAN_TAG_SIZE;
2475
2476         vmolr = rd32(E1000_VMOLR(vfn));
2477         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2478         vmolr |= size | E1000_VMOLR_LPE;
2479         wr32(E1000_VMOLR(vfn), vmolr);
2480
2481         return 0;
2482 }
2483
2484 /**
2485  * igb_rlpml_set - set maximum receive packet size
2486  * @adapter: board private structure
2487  *
2488  * Configure maximum receivable packet size.
2489  **/
2490 static void igb_rlpml_set(struct igb_adapter *adapter)
2491 {
2492         u32 max_frame_size = adapter->max_frame_size;
2493         struct e1000_hw *hw = &adapter->hw;
2494         u16 pf_id = adapter->vfs_allocated_count;
2495
2496         if (adapter->vlgrp)
2497                 max_frame_size += VLAN_TAG_SIZE;
2498
2499         /* if vfs are enabled we set RLPML to the largest possible request
2500          * size and set the VMOLR RLPML to the size we need */
2501         if (pf_id) {
2502                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2503                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2504         }
2505
2506         wr32(E1000_RLPML, max_frame_size);
2507 }
2508
2509 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2510                                  int vfn, bool aupe)
2511 {
2512         struct e1000_hw *hw = &adapter->hw;
2513         u32 vmolr;
2514
2515         /*
2516          * This register exists only on 82576 and newer so if we are older then
2517          * we should exit and do nothing
2518          */
2519         if (hw->mac.type < e1000_82576)
2520                 return;
2521
2522         vmolr = rd32(E1000_VMOLR(vfn));
2523         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2524         if (aupe)
2525                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2526         else
2527                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2528
2529         /* clear all bits that might not be set */
2530         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2531
2532         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2533                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2534         /*
2535          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2536          * multicast packets
2537          */
2538         if (vfn <= adapter->vfs_allocated_count)
2539                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2540
2541         wr32(E1000_VMOLR(vfn), vmolr);
2542 }
2543
2544 /**
2545  * igb_configure_rx_ring - Configure a receive ring after Reset
2546  * @adapter: board private structure
2547  * @ring: receive ring to be configured
2548  *
2549  * Configure the Rx unit of the MAC after a reset.
2550  **/
2551 void igb_configure_rx_ring(struct igb_adapter *adapter,
2552                            struct igb_ring *ring)
2553 {
2554         struct e1000_hw *hw = &adapter->hw;
2555         u64 rdba = ring->dma;
2556         int reg_idx = ring->reg_idx;
2557         u32 srrctl, rxdctl;
2558
2559         /* disable the queue */
2560         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2561         wr32(E1000_RXDCTL(reg_idx),
2562                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2563
2564         /* Set DMA base address registers */
2565         wr32(E1000_RDBAL(reg_idx),
2566              rdba & 0x00000000ffffffffULL);
2567         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2568         wr32(E1000_RDLEN(reg_idx),
2569                        ring->count * sizeof(union e1000_adv_rx_desc));
2570
2571         /* initialize head and tail */
2572         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2573         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2574         writel(0, ring->head);
2575         writel(0, ring->tail);
2576
2577         /* set descriptor configuration */
2578         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2579                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2580                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2581 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2582                 srrctl |= IGB_RXBUFFER_16384 >>
2583                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2584 #else
2585                 srrctl |= (PAGE_SIZE / 2) >>
2586                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2587 #endif
2588                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2589         } else {
2590                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2591                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2592                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2593         }
2594         /* Only set Drop Enable if we are supporting multiple queues */
2595         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2596                 srrctl |= E1000_SRRCTL_DROP_EN;
2597
2598         wr32(E1000_SRRCTL(reg_idx), srrctl);
2599
2600         /* set filtering for VMDQ pools */
2601         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2602
2603         /* enable receive descriptor fetching */
2604         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2605         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2606         rxdctl &= 0xFFF00000;
2607         rxdctl |= IGB_RX_PTHRESH;
2608         rxdctl |= IGB_RX_HTHRESH << 8;
2609         rxdctl |= IGB_RX_WTHRESH << 16;
2610         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2611 }
2612
2613 /**
2614  * igb_configure_rx - Configure receive Unit after Reset
2615  * @adapter: board private structure
2616  *
2617  * Configure the Rx unit of the MAC after a reset.
2618  **/
2619 static void igb_configure_rx(struct igb_adapter *adapter)
2620 {
2621         int i;
2622
2623         /* set UTA to appropriate mode */
2624         igb_set_uta(adapter);
2625
2626         /* set the correct pool for the PF default MAC address in entry 0 */
2627         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2628                          adapter->vfs_allocated_count);
2629
2630         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2631          * the Base and Length of the Rx Descriptor Ring */
2632         for (i = 0; i < adapter->num_rx_queues; i++)
2633                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2634 }
2635
2636 /**
2637  * igb_free_tx_resources - Free Tx Resources per Queue
2638  * @tx_ring: Tx descriptor ring for a specific queue
2639  *
2640  * Free all transmit software resources
2641  **/
2642 void igb_free_tx_resources(struct igb_ring *tx_ring)
2643 {
2644         igb_clean_tx_ring(tx_ring);
2645
2646         vfree(tx_ring->buffer_info);
2647         tx_ring->buffer_info = NULL;
2648
2649         /* if not set, then don't free */
2650         if (!tx_ring->desc)
2651                 return;
2652
2653         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2654                             tx_ring->desc, tx_ring->dma);
2655
2656         tx_ring->desc = NULL;
2657 }
2658
2659 /**
2660  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2661  * @adapter: board private structure
2662  *
2663  * Free all transmit software resources
2664  **/
2665 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2666 {
2667         int i;
2668
2669         for (i = 0; i < adapter->num_tx_queues; i++)
2670                 igb_free_tx_resources(adapter->tx_ring[i]);
2671 }
2672
2673 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2674                                     struct igb_buffer *buffer_info)
2675 {
2676         if (buffer_info->dma) {
2677                 if (buffer_info->mapped_as_page)
2678                         pci_unmap_page(tx_ring->pdev,
2679                                         buffer_info->dma,
2680                                         buffer_info->length,
2681                                         PCI_DMA_TODEVICE);
2682                 else
2683                         pci_unmap_single(tx_ring->pdev,
2684                                         buffer_info->dma,
2685                                         buffer_info->length,
2686                                         PCI_DMA_TODEVICE);
2687                 buffer_info->dma = 0;
2688         }
2689         if (buffer_info->skb) {
2690                 dev_kfree_skb_any(buffer_info->skb);
2691                 buffer_info->skb = NULL;
2692         }
2693         buffer_info->time_stamp = 0;
2694         buffer_info->length = 0;
2695         buffer_info->next_to_watch = 0;
2696         buffer_info->mapped_as_page = false;
2697 }
2698
2699 /**
2700  * igb_clean_tx_ring - Free Tx Buffers
2701  * @tx_ring: ring to be cleaned
2702  **/
2703 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2704 {
2705         struct igb_buffer *buffer_info;
2706         unsigned long size;
2707         unsigned int i;
2708
2709         if (!tx_ring->buffer_info)
2710                 return;
2711         /* Free all the Tx ring sk_buffs */
2712
2713         for (i = 0; i < tx_ring->count; i++) {
2714                 buffer_info = &tx_ring->buffer_info[i];
2715                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2716         }
2717
2718         size = sizeof(struct igb_buffer) * tx_ring->count;
2719         memset(tx_ring->buffer_info, 0, size);
2720
2721         /* Zero out the descriptor ring */
2722         memset(tx_ring->desc, 0, tx_ring->size);
2723
2724         tx_ring->next_to_use = 0;
2725         tx_ring->next_to_clean = 0;
2726 }
2727
2728 /**
2729  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2730  * @adapter: board private structure
2731  **/
2732 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2733 {
2734         int i;
2735
2736         for (i = 0; i < adapter->num_tx_queues; i++)
2737                 igb_clean_tx_ring(adapter->tx_ring[i]);
2738 }
2739
2740 /**
2741  * igb_free_rx_resources - Free Rx Resources
2742  * @rx_ring: ring to clean the resources from
2743  *
2744  * Free all receive software resources
2745  **/
2746 void igb_free_rx_resources(struct igb_ring *rx_ring)
2747 {
2748         igb_clean_rx_ring(rx_ring);
2749
2750         vfree(rx_ring->buffer_info);
2751         rx_ring->buffer_info = NULL;
2752
2753         /* if not set, then don't free */
2754         if (!rx_ring->desc)
2755                 return;
2756
2757         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2758                             rx_ring->desc, rx_ring->dma);
2759
2760         rx_ring->desc = NULL;
2761 }
2762
2763 /**
2764  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2765  * @adapter: board private structure
2766  *
2767  * Free all receive software resources
2768  **/
2769 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2770 {
2771         int i;
2772
2773         for (i = 0; i < adapter->num_rx_queues; i++)
2774                 igb_free_rx_resources(adapter->rx_ring[i]);
2775 }
2776
2777 /**
2778  * igb_clean_rx_ring - Free Rx Buffers per Queue
2779  * @rx_ring: ring to free buffers from
2780  **/
2781 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2782 {
2783         struct igb_buffer *buffer_info;
2784         unsigned long size;
2785         unsigned int i;
2786
2787         if (!rx_ring->buffer_info)
2788                 return;
2789
2790         /* Free all the Rx ring sk_buffs */
2791         for (i = 0; i < rx_ring->count; i++) {
2792                 buffer_info = &rx_ring->buffer_info[i];
2793                 if (buffer_info->dma) {
2794                         pci_unmap_single(rx_ring->pdev,
2795                                          buffer_info->dma,
2796                                          rx_ring->rx_buffer_len,
2797                                          PCI_DMA_FROMDEVICE);
2798                         buffer_info->dma = 0;
2799                 }
2800
2801                 if (buffer_info->skb) {
2802                         dev_kfree_skb(buffer_info->skb);
2803                         buffer_info->skb = NULL;
2804                 }
2805                 if (buffer_info->page_dma) {
2806                         pci_unmap_page(rx_ring->pdev,
2807                                        buffer_info->page_dma,
2808                                        PAGE_SIZE / 2,
2809                                        PCI_DMA_FROMDEVICE);
2810                         buffer_info->page_dma = 0;
2811                 }
2812                 if (buffer_info->page) {
2813                         put_page(buffer_info->page);
2814                         buffer_info->page = NULL;
2815                         buffer_info->page_offset = 0;
2816                 }
2817         }
2818
2819         size = sizeof(struct igb_buffer) * rx_ring->count;
2820         memset(rx_ring->buffer_info, 0, size);
2821
2822         /* Zero out the descriptor ring */
2823         memset(rx_ring->desc, 0, rx_ring->size);
2824
2825         rx_ring->next_to_clean = 0;
2826         rx_ring->next_to_use = 0;
2827 }
2828
2829 /**
2830  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2831  * @adapter: board private structure
2832  **/
2833 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2834 {
2835         int i;
2836
2837         for (i = 0; i < adapter->num_rx_queues; i++)
2838                 igb_clean_rx_ring(adapter->rx_ring[i]);
2839 }
2840
2841 /**
2842  * igb_set_mac - Change the Ethernet Address of the NIC
2843  * @netdev: network interface device structure
2844  * @p: pointer to an address structure
2845  *
2846  * Returns 0 on success, negative on failure
2847  **/
2848 static int igb_set_mac(struct net_device *netdev, void *p)
2849 {
2850         struct igb_adapter *adapter = netdev_priv(netdev);
2851         struct e1000_hw *hw = &adapter->hw;
2852         struct sockaddr *addr = p;
2853
2854         if (!is_valid_ether_addr(addr->sa_data))
2855                 return -EADDRNOTAVAIL;
2856
2857         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2858         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2859
2860         /* set the correct pool for the new PF MAC address in entry 0 */
2861         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2862                          adapter->vfs_allocated_count);
2863
2864         return 0;
2865 }
2866
2867 /**
2868  * igb_write_mc_addr_list - write multicast addresses to MTA
2869  * @netdev: network interface device structure
2870  *
2871  * Writes multicast address list to the MTA hash table.
2872  * Returns: -ENOMEM on failure
2873  *                0 on no addresses written
2874  *                X on writing X addresses to MTA
2875  **/
2876 static int igb_write_mc_addr_list(struct net_device *netdev)
2877 {
2878         struct igb_adapter *adapter = netdev_priv(netdev);
2879         struct e1000_hw *hw = &adapter->hw;
2880         struct dev_mc_list *mc_ptr = netdev->mc_list;
2881         u8  *mta_list;
2882         int i;
2883
2884         if (netdev_mc_empty(netdev)) {
2885                 /* nothing to program, so clear mc list */
2886                 igb_update_mc_addr_list(hw, NULL, 0);
2887                 igb_restore_vf_multicasts(adapter);
2888                 return 0;
2889         }
2890
2891         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2892         if (!mta_list)
2893                 return -ENOMEM;
2894
2895         /* The shared function expects a packed array of only addresses. */
2896         mc_ptr = netdev->mc_list;
2897
2898         for (i = 0; i < netdev_mc_count(netdev); i++) {
2899                 if (!mc_ptr)
2900                         break;
2901                 memcpy(mta_list + (i*ETH_ALEN), mc_ptr->dmi_addr, ETH_ALEN);
2902                 mc_ptr = mc_ptr->next;
2903         }
2904         igb_update_mc_addr_list(hw, mta_list, i);
2905         kfree(mta_list);
2906
2907         return netdev_mc_count(netdev);
2908 }
2909
2910 /**
2911  * igb_write_uc_addr_list - write unicast addresses to RAR table
2912  * @netdev: network interface device structure
2913  *
2914  * Writes unicast address list to the RAR table.
2915  * Returns: -ENOMEM on failure/insufficient address space
2916  *                0 on no addresses written
2917  *                X on writing X addresses to the RAR table
2918  **/
2919 static int igb_write_uc_addr_list(struct net_device *netdev)
2920 {
2921         struct igb_adapter *adapter = netdev_priv(netdev);
2922         struct e1000_hw *hw = &adapter->hw;
2923         unsigned int vfn = adapter->vfs_allocated_count;
2924         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2925         int count = 0;
2926
2927         /* return ENOMEM indicating insufficient memory for addresses */
2928         if (netdev_uc_count(netdev) > rar_entries)
2929                 return -ENOMEM;
2930
2931         if (!netdev_uc_empty(netdev) && rar_entries) {
2932                 struct netdev_hw_addr *ha;
2933
2934                 netdev_for_each_uc_addr(ha, netdev) {
2935                         if (!rar_entries)
2936                                 break;
2937                         igb_rar_set_qsel(adapter, ha->addr,
2938                                          rar_entries--,
2939                                          vfn);
2940                         count++;
2941                 }
2942         }
2943         /* write the addresses in reverse order to avoid write combining */
2944         for (; rar_entries > 0 ; rar_entries--) {
2945                 wr32(E1000_RAH(rar_entries), 0);
2946                 wr32(E1000_RAL(rar_entries), 0);
2947         }
2948         wrfl();
2949
2950         return count;
2951 }
2952
2953 /**
2954  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2955  * @netdev: network interface device structure
2956  *
2957  * The set_rx_mode entry point is called whenever the unicast or multicast
2958  * address lists or the network interface flags are updated.  This routine is
2959  * responsible for configuring the hardware for proper unicast, multicast,
2960  * promiscuous mode, and all-multi behavior.
2961  **/
2962 static void igb_set_rx_mode(struct net_device *netdev)
2963 {
2964         struct igb_adapter *adapter = netdev_priv(netdev);
2965         struct e1000_hw *hw = &adapter->hw;
2966         unsigned int vfn = adapter->vfs_allocated_count;
2967         u32 rctl, vmolr = 0;
2968         int count;
2969
2970         /* Check for Promiscuous and All Multicast modes */
2971         rctl = rd32(E1000_RCTL);
2972
2973         /* clear the effected bits */
2974         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2975
2976         if (netdev->flags & IFF_PROMISC) {
2977                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2978                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2979         } else {
2980                 if (netdev->flags & IFF_ALLMULTI) {
2981                         rctl |= E1000_RCTL_MPE;
2982                         vmolr |= E1000_VMOLR_MPME;
2983                 } else {
2984                         /*
2985                          * Write addresses to the MTA, if the attempt fails
2986                          * then we should just turn on promiscous mode so
2987                          * that we can at least receive multicast traffic
2988                          */
2989                         count = igb_write_mc_addr_list(netdev);
2990                         if (count < 0) {
2991                                 rctl |= E1000_RCTL_MPE;
2992                                 vmolr |= E1000_VMOLR_MPME;
2993                         } else if (count) {
2994                                 vmolr |= E1000_VMOLR_ROMPE;
2995                         }
2996                 }
2997                 /*
2998                  * Write addresses to available RAR registers, if there is not
2999                  * sufficient space to store all the addresses then enable
3000                  * unicast promiscous mode
3001                  */
3002                 count = igb_write_uc_addr_list(netdev);
3003                 if (count < 0) {
3004                         rctl |= E1000_RCTL_UPE;
3005                         vmolr |= E1000_VMOLR_ROPE;
3006                 }
3007                 rctl |= E1000_RCTL_VFE;
3008         }
3009         wr32(E1000_RCTL, rctl);
3010
3011         /*
3012          * In order to support SR-IOV and eventually VMDq it is necessary to set
3013          * the VMOLR to enable the appropriate modes.  Without this workaround
3014          * we will have issues with VLAN tag stripping not being done for frames
3015          * that are only arriving because we are the default pool
3016          */
3017         if (hw->mac.type < e1000_82576)
3018                 return;
3019
3020         vmolr |= rd32(E1000_VMOLR(vfn)) &
3021                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3022         wr32(E1000_VMOLR(vfn), vmolr);
3023         igb_restore_vf_multicasts(adapter);
3024 }
3025
3026 /* Need to wait a few seconds after link up to get diagnostic information from
3027  * the phy */
3028 static void igb_update_phy_info(unsigned long data)
3029 {
3030         struct igb_adapter *adapter = (struct igb_adapter *) data;
3031         igb_get_phy_info(&adapter->hw);
3032 }
3033
3034 /**
3035  * igb_has_link - check shared code for link and determine up/down
3036  * @adapter: pointer to driver private info
3037  **/
3038 bool igb_has_link(struct igb_adapter *adapter)
3039 {
3040         struct e1000_hw *hw = &adapter->hw;
3041         bool link_active = false;
3042         s32 ret_val = 0;
3043
3044         /* get_link_status is set on LSC (link status) interrupt or
3045          * rx sequence error interrupt.  get_link_status will stay
3046          * false until the e1000_check_for_link establishes link
3047          * for copper adapters ONLY
3048          */
3049         switch (hw->phy.media_type) {
3050         case e1000_media_type_copper:
3051                 if (hw->mac.get_link_status) {
3052                         ret_val = hw->mac.ops.check_for_link(hw);
3053                         link_active = !hw->mac.get_link_status;
3054                 } else {
3055                         link_active = true;
3056                 }
3057                 break;
3058         case e1000_media_type_internal_serdes:
3059                 ret_val = hw->mac.ops.check_for_link(hw);
3060                 link_active = hw->mac.serdes_has_link;
3061                 break;
3062         default:
3063         case e1000_media_type_unknown:
3064                 break;
3065         }
3066
3067         return link_active;
3068 }
3069
3070 /**
3071  * igb_watchdog - Timer Call-back
3072  * @data: pointer to adapter cast into an unsigned long
3073  **/
3074 static void igb_watchdog(unsigned long data)
3075 {
3076         struct igb_adapter *adapter = (struct igb_adapter *)data;
3077         /* Do the rest outside of interrupt context */
3078         schedule_work(&adapter->watchdog_task);
3079 }
3080
3081 static void igb_watchdog_task(struct work_struct *work)
3082 {
3083         struct igb_adapter *adapter = container_of(work,
3084                                                    struct igb_adapter,
3085                                                    watchdog_task);
3086         struct e1000_hw *hw = &adapter->hw;
3087         struct net_device *netdev = adapter->netdev;
3088         u32 link;
3089         int i;
3090
3091         link = igb_has_link(adapter);
3092         if (link) {
3093                 if (!netif_carrier_ok(netdev)) {
3094                         u32 ctrl;
3095                         hw->mac.ops.get_speed_and_duplex(hw,
3096                                                          &adapter->link_speed,
3097                                                          &adapter->link_duplex);
3098
3099                         ctrl = rd32(E1000_CTRL);
3100                         /* Links status message must follow this format */
3101                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3102                                  "Flow Control: %s\n",
3103                                netdev->name,
3104                                adapter->link_speed,
3105                                adapter->link_duplex == FULL_DUPLEX ?
3106                                  "Full Duplex" : "Half Duplex",
3107                                ((ctrl & E1000_CTRL_TFCE) &&
3108                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3109                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3110                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3111
3112                         /* tweak tx_queue_len according to speed/duplex and
3113                          * adjust the timeout factor */
3114                         netdev->tx_queue_len = adapter->tx_queue_len;
3115                         adapter->tx_timeout_factor = 1;
3116                         switch (adapter->link_speed) {
3117                         case SPEED_10:
3118                                 netdev->tx_queue_len = 10;
3119                                 adapter->tx_timeout_factor = 14;
3120                                 break;
3121                         case SPEED_100:
3122                                 netdev->tx_queue_len = 100;
3123                                 /* maybe add some timeout factor ? */
3124                                 break;
3125                         }
3126
3127                         netif_carrier_on(netdev);
3128
3129                         igb_ping_all_vfs(adapter);
3130
3131                         /* link state has changed, schedule phy info update */
3132                         if (!test_bit(__IGB_DOWN, &adapter->state))
3133                                 mod_timer(&adapter->phy_info_timer,
3134                                           round_jiffies(jiffies + 2 * HZ));
3135                 }
3136         } else {
3137                 if (netif_carrier_ok(netdev)) {
3138                         adapter->link_speed = 0;
3139                         adapter->link_duplex = 0;
3140                         /* Links status message must follow this format */
3141                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3142                                netdev->name);
3143                         netif_carrier_off(netdev);
3144
3145                         igb_ping_all_vfs(adapter);
3146
3147                         /* link state has changed, schedule phy info update */
3148                         if (!test_bit(__IGB_DOWN, &adapter->state))
3149                                 mod_timer(&adapter->phy_info_timer,
3150                                           round_jiffies(jiffies + 2 * HZ));
3151                 }
3152         }
3153
3154         igb_update_stats(adapter);
3155
3156         for (i = 0; i < adapter->num_tx_queues; i++) {
3157                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3158                 if (!netif_carrier_ok(netdev)) {
3159                         /* We've lost link, so the controller stops DMA,
3160                          * but we've got queued Tx work that's never going
3161                          * to get done, so reset controller to flush Tx.
3162                          * (Do the reset outside of interrupt context). */
3163                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3164                                 adapter->tx_timeout_count++;
3165                                 schedule_work(&adapter->reset_task);
3166                                 /* return immediately since reset is imminent */
3167                                 return;
3168                         }
3169                 }
3170
3171                 /* Force detection of hung controller every watchdog period */
3172                 tx_ring->detect_tx_hung = true;
3173         }
3174
3175         /* Cause software interrupt to ensure rx ring is cleaned */
3176         if (adapter->msix_entries) {
3177                 u32 eics = 0;
3178                 for (i = 0; i < adapter->num_q_vectors; i++) {
3179                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3180                         eics |= q_vector->eims_value;
3181                 }
3182                 wr32(E1000_EICS, eics);
3183         } else {
3184                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3185         }
3186
3187         /* Reset the timer */
3188         if (!test_bit(__IGB_DOWN, &adapter->state))
3189                 mod_timer(&adapter->watchdog_timer,
3190                           round_jiffies(jiffies + 2 * HZ));
3191 }
3192
3193 enum latency_range {
3194         lowest_latency = 0,
3195         low_latency = 1,
3196         bulk_latency = 2,
3197         latency_invalid = 255
3198 };
3199
3200 /**
3201  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3202  *
3203  *      Stores a new ITR value based on strictly on packet size.  This
3204  *      algorithm is less sophisticated than that used in igb_update_itr,
3205  *      due to the difficulty of synchronizing statistics across multiple
3206  *      receive rings.  The divisors and thresholds used by this fuction
3207  *      were determined based on theoretical maximum wire speed and testing
3208  *      data, in order to minimize response time while increasing bulk
3209  *      throughput.
3210  *      This functionality is controlled by the InterruptThrottleRate module
3211  *      parameter (see igb_param.c)
3212  *      NOTE:  This function is called only when operating in a multiqueue
3213  *             receive environment.
3214  * @q_vector: pointer to q_vector
3215  **/
3216 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3217 {
3218         int new_val = q_vector->itr_val;
3219         int avg_wire_size = 0;
3220         struct igb_adapter *adapter = q_vector->adapter;
3221
3222         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3223          * ints/sec - ITR timer value of 120 ticks.
3224          */
3225         if (adapter->link_speed != SPEED_1000) {
3226                 new_val = 976;
3227                 goto set_itr_val;
3228         }
3229
3230         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3231                 struct igb_ring *ring = q_vector->rx_ring;
3232                 avg_wire_size = ring->total_bytes / ring->total_packets;
3233         }
3234
3235         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3236                 struct igb_ring *ring = q_vector->tx_ring;
3237                 avg_wire_size = max_t(u32, avg_wire_size,
3238                                       (ring->total_bytes /
3239                                        ring->total_packets));
3240         }
3241
3242         /* if avg_wire_size isn't set no work was done */
3243         if (!avg_wire_size)
3244                 goto clear_counts;
3245
3246         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3247         avg_wire_size += 24;
3248
3249         /* Don't starve jumbo frames */
3250         avg_wire_size = min(avg_wire_size, 3000);
3251
3252         /* Give a little boost to mid-size frames */
3253         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3254                 new_val = avg_wire_size / 3;
3255         else
3256                 new_val = avg_wire_size / 2;
3257
3258         /* when in itr mode 3 do not exceed 20K ints/sec */
3259         if (adapter->rx_itr_setting == 3 && new_val < 196)
3260                 new_val = 196;
3261
3262 set_itr_val:
3263         if (new_val != q_vector->itr_val) {
3264                 q_vector->itr_val = new_val;
3265                 q_vector->set_itr = 1;
3266         }
3267 clear_counts:
3268         if (q_vector->rx_ring) {
3269                 q_vector->rx_ring->total_bytes = 0;
3270                 q_vector->rx_ring->total_packets = 0;
3271         }
3272         if (q_vector->tx_ring) {
3273                 q_vector->tx_ring->total_bytes = 0;
3274                 q_vector->tx_ring->total_packets = 0;
3275         }
3276 }
3277
3278 /**
3279  * igb_update_itr - update the dynamic ITR value based on statistics
3280  *      Stores a new ITR value based on packets and byte
3281  *      counts during the last interrupt.  The advantage of per interrupt
3282  *      computation is faster updates and more accurate ITR for the current
3283  *      traffic pattern.  Constants in this function were computed
3284  *      based on theoretical maximum wire speed and thresholds were set based
3285  *      on testing data as well as attempting to minimize response time
3286  *      while increasing bulk throughput.
3287  *      this functionality is controlled by the InterruptThrottleRate module
3288  *      parameter (see igb_param.c)
3289  *      NOTE:  These calculations are only valid when operating in a single-
3290  *             queue environment.
3291  * @adapter: pointer to adapter
3292  * @itr_setting: current q_vector->itr_val
3293  * @packets: the number of packets during this measurement interval
3294  * @bytes: the number of bytes during this measurement interval
3295  **/
3296 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3297                                    int packets, int bytes)
3298 {
3299         unsigned int retval = itr_setting;
3300
3301         if (packets == 0)
3302                 goto update_itr_done;
3303
3304         switch (itr_setting) {
3305         case lowest_latency:
3306                 /* handle TSO and jumbo frames */
3307                 if (bytes/packets > 8000)
3308                         retval = bulk_latency;
3309                 else if ((packets < 5) && (bytes > 512))
3310                         retval = low_latency;
3311                 break;
3312         case low_latency:  /* 50 usec aka 20000 ints/s */
3313                 if (bytes > 10000) {
3314                         /* this if handles the TSO accounting */
3315                         if (bytes/packets > 8000) {
3316                                 retval = bulk_latency;
3317                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3318                                 retval = bulk_latency;
3319                         } else if ((packets > 35)) {
3320                                 retval = lowest_latency;
3321                         }
3322                 } else if (bytes/packets > 2000) {
3323                         retval = bulk_latency;
3324                 } else if (packets <= 2 && bytes < 512) {
3325                         retval = lowest_latency;
3326                 }
3327                 break;
3328         case bulk_latency: /* 250 usec aka 4000 ints/s */
3329                 if (bytes > 25000) {
3330                         if (packets > 35)
3331                                 retval = low_latency;
3332                 } else if (bytes < 1500) {
3333                         retval = low_latency;
3334                 }
3335                 break;
3336         }
3337
3338 update_itr_done:
3339         return retval;
3340 }
3341
3342 static void igb_set_itr(struct igb_adapter *adapter)
3343 {
3344         struct igb_q_vector *q_vector = adapter->q_vector[0];
3345         u16 current_itr;
3346         u32 new_itr = q_vector->itr_val;
3347
3348         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3349         if (adapter->link_speed != SPEED_1000) {
3350                 current_itr = 0;
3351                 new_itr = 4000;
3352                 goto set_itr_now;
3353         }
3354
3355         adapter->rx_itr = igb_update_itr(adapter,
3356                                     adapter->rx_itr,
3357                                     q_vector->rx_ring->total_packets,
3358                                     q_vector->rx_ring->total_bytes);
3359
3360         adapter->tx_itr = igb_update_itr(adapter,
3361                                     adapter->tx_itr,
3362                                     q_vector->tx_ring->total_packets,
3363                                     q_vector->tx_ring->total_bytes);
3364         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3365
3366         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3367         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3368                 current_itr = low_latency;
3369
3370         switch (current_itr) {
3371         /* counts and packets in update_itr are dependent on these numbers */
3372         case lowest_latency:
3373                 new_itr = 56;  /* aka 70,000 ints/sec */
3374                 break;
3375         case low_latency:
3376                 new_itr = 196; /* aka 20,000 ints/sec */
3377                 break;
3378         case bulk_latency:
3379                 new_itr = 980; /* aka 4,000 ints/sec */
3380                 break;
3381         default:
3382                 break;
3383         }
3384
3385 set_itr_now:
3386         q_vector->rx_ring->total_bytes = 0;
3387         q_vector->rx_ring->total_packets = 0;
3388         q_vector->tx_ring->total_bytes = 0;
3389         q_vector->tx_ring->total_packets = 0;
3390
3391         if (new_itr != q_vector->itr_val) {
3392                 /* this attempts to bias the interrupt rate towards Bulk
3393                  * by adding intermediate steps when interrupt rate is
3394                  * increasing */
3395                 new_itr = new_itr > q_vector->itr_val ?
3396                              max((new_itr * q_vector->itr_val) /
3397                                  (new_itr + (q_vector->itr_val >> 2)),
3398                                  new_itr) :
3399                              new_itr;
3400                 /* Don't write the value here; it resets the adapter's
3401                  * internal timer, and causes us to delay far longer than
3402                  * we should between interrupts.  Instead, we write the ITR
3403                  * value at the beginning of the next interrupt so the timing
3404                  * ends up being correct.
3405                  */
3406                 q_vector->itr_val = new_itr;
3407                 q_vector->set_itr = 1;
3408         }
3409
3410         return;
3411 }
3412
3413 #define IGB_TX_FLAGS_CSUM               0x00000001
3414 #define IGB_TX_FLAGS_VLAN               0x00000002
3415 #define IGB_TX_FLAGS_TSO                0x00000004
3416 #define IGB_TX_FLAGS_IPV4               0x00000008
3417 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3418 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3419 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3420
3421 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3422                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3423 {
3424         struct e1000_adv_tx_context_desc *context_desc;
3425         unsigned int i;
3426         int err;
3427         struct igb_buffer *buffer_info;
3428         u32 info = 0, tu_cmd = 0;
3429         u32 mss_l4len_idx;
3430         u8 l4len;
3431
3432         if (skb_header_cloned(skb)) {
3433                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3434                 if (err)
3435                         return err;
3436         }
3437
3438         l4len = tcp_hdrlen(skb);
3439         *hdr_len += l4len;
3440
3441         if (skb->protocol == htons(ETH_P_IP)) {
3442                 struct iphdr *iph = ip_hdr(skb);
3443                 iph->tot_len = 0;
3444                 iph->check = 0;
3445                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3446                                                          iph->daddr, 0,
3447                                                          IPPROTO_TCP,
3448                                                          0);
3449         } else if (skb_is_gso_v6(skb)) {
3450                 ipv6_hdr(skb)->payload_len = 0;
3451                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3452                                                        &ipv6_hdr(skb)->daddr,
3453                                                        0, IPPROTO_TCP, 0);
3454         }
3455
3456         i = tx_ring->next_to_use;
3457
3458         buffer_info = &tx_ring->buffer_info[i];
3459         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3460         /* VLAN MACLEN IPLEN */
3461         if (tx_flags & IGB_TX_FLAGS_VLAN)
3462                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3463         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3464         *hdr_len += skb_network_offset(skb);
3465         info |= skb_network_header_len(skb);
3466         *hdr_len += skb_network_header_len(skb);
3467         context_desc->vlan_macip_lens = cpu_to_le32(info);
3468
3469         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3470         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3471
3472         if (skb->protocol == htons(ETH_P_IP))
3473                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3474         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3475
3476         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3477
3478         /* MSS L4LEN IDX */
3479         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3480         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3481
3482         /* For 82575, context index must be unique per ring. */
3483         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3484                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3485
3486         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3487         context_desc->seqnum_seed = 0;
3488
3489         buffer_info->time_stamp = jiffies;
3490         buffer_info->next_to_watch = i;
3491         buffer_info->dma = 0;
3492         i++;
3493         if (i == tx_ring->count)
3494                 i = 0;
3495
3496         tx_ring->next_to_use = i;
3497
3498         return true;
3499 }
3500
3501 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3502                                    struct sk_buff *skb, u32 tx_flags)
3503 {
3504         struct e1000_adv_tx_context_desc *context_desc;
3505         struct pci_dev *pdev = tx_ring->pdev;
3506         struct igb_buffer *buffer_info;
3507         u32 info = 0, tu_cmd = 0;
3508         unsigned int i;
3509
3510         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3511             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3512                 i = tx_ring->next_to_use;
3513                 buffer_info = &tx_ring->buffer_info[i];
3514                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3515
3516                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3517                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3518
3519                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3520                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3521                         info |= skb_network_header_len(skb);
3522
3523                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3524
3525                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3526
3527                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3528                         __be16 protocol;
3529
3530                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3531                                 const struct vlan_ethhdr *vhdr =
3532                                           (const struct vlan_ethhdr*)skb->data;
3533
3534                                 protocol = vhdr->h_vlan_encapsulated_proto;
3535                         } else {
3536                                 protocol = skb->protocol;
3537                         }
3538
3539                         switch (protocol) {
3540                         case cpu_to_be16(ETH_P_IP):
3541                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3542                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3543                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3544                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3545                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3546                                 break;
3547                         case cpu_to_be16(ETH_P_IPV6):
3548                                 /* XXX what about other V6 headers?? */
3549                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3550                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3551                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3552                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3553                                 break;
3554                         default:
3555                                 if (unlikely(net_ratelimit()))
3556                                         dev_warn(&pdev->dev,
3557                                             "partial checksum but proto=%x!\n",
3558                                             skb->protocol);
3559                                 break;
3560                         }
3561                 }
3562
3563                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3564                 context_desc->seqnum_seed = 0;
3565                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3566                         context_desc->mss_l4len_idx =
3567                                 cpu_to_le32(tx_ring->reg_idx << 4);
3568
3569                 buffer_info->time_stamp = jiffies;
3570                 buffer_info->next_to_watch = i;
3571                 buffer_info->dma = 0;
3572
3573                 i++;
3574                 if (i == tx_ring->count)
3575                         i = 0;
3576                 tx_ring->next_to_use = i;
3577
3578                 return true;
3579         }
3580         return false;
3581 }
3582
3583 #define IGB_MAX_TXD_PWR 16
3584 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3585
3586 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3587                                  unsigned int first)
3588 {
3589         struct igb_buffer *buffer_info;
3590         struct pci_dev *pdev = tx_ring->pdev;
3591         unsigned int len = skb_headlen(skb);
3592         unsigned int count = 0, i;
3593         unsigned int f;
3594
3595         i = tx_ring->next_to_use;
3596
3597         buffer_info = &tx_ring->buffer_info[i];
3598         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3599         buffer_info->length = len;
3600         /* set time_stamp *before* dma to help avoid a possible race */
3601         buffer_info->time_stamp = jiffies;
3602         buffer_info->next_to_watch = i;
3603         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3604                                           PCI_DMA_TODEVICE);
3605         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3606                 goto dma_error;
3607
3608         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3609                 struct skb_frag_struct *frag;
3610
3611                 count++;
3612                 i++;
3613                 if (i == tx_ring->count)
3614                         i = 0;
3615
3616                 frag = &skb_shinfo(skb)->frags[f];
3617                 len = frag->size;
3618
3619                 buffer_info = &tx_ring->buffer_info[i];
3620                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3621                 buffer_info->length = len;
3622                 buffer_info->time_stamp = jiffies;
3623                 buffer_info->next_to_watch = i;
3624                 buffer_info->mapped_as_page = true;
3625                 buffer_info->dma = pci_map_page(pdev,
3626                                                 frag->page,
3627                                                 frag->page_offset,
3628                                                 len,
3629                                                 PCI_DMA_TODEVICE);
3630                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3631                         goto dma_error;
3632
3633         }
3634
3635         tx_ring->buffer_info[i].skb = skb;
3636         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3637         tx_ring->buffer_info[first].next_to_watch = i;
3638
3639         return ++count;
3640
3641 dma_error:
3642         dev_err(&pdev->dev, "TX DMA map failed\n");
3643
3644         /* clear timestamp and dma mappings for failed buffer_info mapping */
3645         buffer_info->dma = 0;
3646         buffer_info->time_stamp = 0;
3647         buffer_info->length = 0;
3648         buffer_info->next_to_watch = 0;
3649         buffer_info->mapped_as_page = false;
3650
3651         /* clear timestamp and dma mappings for remaining portion of packet */
3652         while (count--) {
3653                 if (i == 0)
3654                         i = tx_ring->count;
3655                 i--;
3656                 buffer_info = &tx_ring->buffer_info[i];
3657                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3658         }
3659
3660         return 0;
3661 }
3662
3663 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3664                                     u32 tx_flags, int count, u32 paylen,
3665                                     u8 hdr_len)
3666 {
3667         union e1000_adv_tx_desc *tx_desc;
3668         struct igb_buffer *buffer_info;
3669         u32 olinfo_status = 0, cmd_type_len;
3670         unsigned int i = tx_ring->next_to_use;
3671
3672         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3673                         E1000_ADVTXD_DCMD_DEXT);
3674
3675         if (tx_flags & IGB_TX_FLAGS_VLAN)
3676                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3677
3678         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3679                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3680
3681         if (tx_flags & IGB_TX_FLAGS_TSO) {
3682                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3683
3684                 /* insert tcp checksum */
3685                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3686
3687                 /* insert ip checksum */
3688                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3689                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3690
3691         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3692                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3693         }
3694
3695         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3696             (tx_flags & (IGB_TX_FLAGS_CSUM |
3697                          IGB_TX_FLAGS_TSO |
3698                          IGB_TX_FLAGS_VLAN)))
3699                 olinfo_status |= tx_ring->reg_idx << 4;
3700
3701         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3702
3703         do {
3704                 buffer_info = &tx_ring->buffer_info[i];
3705                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3706                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3707                 tx_desc->read.cmd_type_len =
3708                         cpu_to_le32(cmd_type_len | buffer_info->length);
3709                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3710                 count--;
3711                 i++;
3712                 if (i == tx_ring->count)
3713                         i = 0;
3714         } while (count > 0);
3715
3716         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3717         /* Force memory writes to complete before letting h/w
3718          * know there are new descriptors to fetch.  (Only
3719          * applicable for weak-ordered memory model archs,
3720          * such as IA-64). */
3721         wmb();
3722
3723         tx_ring->next_to_use = i;
3724         writel(i, tx_ring->tail);
3725         /* we need this if more than one processor can write to our tail
3726          * at a time, it syncronizes IO on IA64/Altix systems */
3727         mmiowb();
3728 }
3729
3730 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3731 {
3732         struct net_device *netdev = tx_ring->netdev;
3733
3734         netif_stop_subqueue(netdev, tx_ring->queue_index);
3735
3736         /* Herbert's original patch had:
3737          *  smp_mb__after_netif_stop_queue();
3738          * but since that doesn't exist yet, just open code it. */
3739         smp_mb();
3740
3741         /* We need to check again in a case another CPU has just
3742          * made room available. */
3743         if (igb_desc_unused(tx_ring) < size)
3744                 return -EBUSY;
3745
3746         /* A reprieve! */
3747         netif_wake_subqueue(netdev, tx_ring->queue_index);
3748         tx_ring->tx_stats.restart_queue++;
3749         return 0;
3750 }
3751
3752 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3753 {
3754         if (igb_desc_unused(tx_ring) >= size)
3755                 return 0;
3756         return __igb_maybe_stop_tx(tx_ring, size);
3757 }
3758
3759 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3760                                     struct igb_ring *tx_ring)
3761 {
3762         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3763         int tso = 0, count;
3764         u32 tx_flags = 0;
3765         u16 first;
3766         u8 hdr_len = 0;
3767         union skb_shared_tx *shtx = skb_tx(skb);
3768
3769         /* need: 1 descriptor per page,
3770          *       + 2 desc gap to keep tail from touching head,
3771          *       + 1 desc for skb->data,
3772          *       + 1 desc for context descriptor,
3773          * otherwise try next time */
3774         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3775                 /* this is a hard error */
3776                 return NETDEV_TX_BUSY;
3777         }
3778
3779         if (unlikely(shtx->hardware)) {
3780                 shtx->in_progress = 1;
3781                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3782         }
3783
3784         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3785                 tx_flags |= IGB_TX_FLAGS_VLAN;
3786                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3787         }
3788
3789         if (skb->protocol == htons(ETH_P_IP))
3790                 tx_flags |= IGB_TX_FLAGS_IPV4;
3791
3792         first = tx_ring->next_to_use;
3793         if (skb_is_gso(skb)) {
3794                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3795
3796                 if (tso < 0) {
3797                         dev_kfree_skb_any(skb);
3798                         return NETDEV_TX_OK;
3799                 }
3800         }
3801
3802         if (tso)
3803                 tx_flags |= IGB_TX_FLAGS_TSO;
3804         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3805                  (skb->ip_summed == CHECKSUM_PARTIAL))
3806                 tx_flags |= IGB_TX_FLAGS_CSUM;
3807
3808         /*
3809          * count reflects descriptors mapped, if 0 or less then mapping error
3810          * has occured and we need to rewind the descriptor queue
3811          */
3812         count = igb_tx_map_adv(tx_ring, skb, first);
3813         if (!count) {
3814                 dev_kfree_skb_any(skb);
3815                 tx_ring->buffer_info[first].time_stamp = 0;
3816                 tx_ring->next_to_use = first;
3817                 return NETDEV_TX_OK;
3818         }
3819
3820         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3821
3822         /* Make sure there is space in the ring for the next send. */
3823         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3824
3825         return NETDEV_TX_OK;
3826 }
3827
3828 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3829                                       struct net_device *netdev)
3830 {
3831         struct igb_adapter *adapter = netdev_priv(netdev);
3832         struct igb_ring *tx_ring;
3833         int r_idx = 0;
3834
3835         if (test_bit(__IGB_DOWN, &adapter->state)) {
3836                 dev_kfree_skb_any(skb);
3837                 return NETDEV_TX_OK;
3838         }
3839
3840         if (skb->len <= 0) {
3841                 dev_kfree_skb_any(skb);
3842                 return NETDEV_TX_OK;
3843         }
3844
3845         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3846         tx_ring = adapter->multi_tx_table[r_idx];
3847
3848         /* This goes back to the question of how to logically map a tx queue
3849          * to a flow.  Right now, performance is impacted slightly negatively
3850          * if using multiple tx queues.  If the stack breaks away from a
3851          * single qdisc implementation, we can look at this again. */
3852         return igb_xmit_frame_ring_adv(skb, tx_ring);
3853 }
3854
3855 /**
3856  * igb_tx_timeout - Respond to a Tx Hang
3857  * @netdev: network interface device structure
3858  **/
3859 static void igb_tx_timeout(struct net_device *netdev)
3860 {
3861         struct igb_adapter *adapter = netdev_priv(netdev);
3862         struct e1000_hw *hw = &adapter->hw;
3863
3864         /* Do the reset outside of interrupt context */
3865         adapter->tx_timeout_count++;
3866
3867         if (hw->mac.type == e1000_82580)
3868                 hw->dev_spec._82575.global_device_reset = true;
3869
3870         schedule_work(&adapter->reset_task);
3871         wr32(E1000_EICS,
3872              (adapter->eims_enable_mask & ~adapter->eims_other));
3873 }
3874
3875 static void igb_reset_task(struct work_struct *work)
3876 {
3877         struct igb_adapter *adapter;
3878         adapter = container_of(work, struct igb_adapter, reset_task);
3879
3880         igb_reinit_locked(adapter);
3881 }
3882
3883 /**
3884  * igb_get_stats - Get System Network Statistics
3885  * @netdev: network interface device structure
3886  *
3887  * Returns the address of the device statistics structure.
3888  * The statistics are actually updated from the timer callback.
3889  **/
3890 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3891 {
3892         /* only return the current stats */
3893         return &netdev->stats;
3894 }
3895
3896 /**
3897  * igb_change_mtu - Change the Maximum Transfer Unit
3898  * @netdev: network interface device structure
3899  * @new_mtu: new value for maximum frame size
3900  *
3901  * Returns 0 on success, negative on failure
3902  **/
3903 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3904 {
3905         struct igb_adapter *adapter = netdev_priv(netdev);
3906         struct pci_dev *pdev = adapter->pdev;
3907         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3908         u32 rx_buffer_len, i;
3909
3910         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3911                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3912                 return -EINVAL;
3913         }
3914
3915         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3916                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3917                 return -EINVAL;
3918         }
3919
3920         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3921                 msleep(1);
3922
3923         /* igb_down has a dependency on max_frame_size */
3924         adapter->max_frame_size = max_frame;
3925
3926         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3927          * means we reserve 2 more, this pushes us to allocate from the next
3928          * larger slab size.
3929          * i.e. RXBUFFER_2048 --> size-4096 slab
3930          */
3931
3932         if (max_frame <= IGB_RXBUFFER_1024)
3933                 rx_buffer_len = IGB_RXBUFFER_1024;
3934         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3935                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3936         else
3937                 rx_buffer_len = IGB_RXBUFFER_128;
3938
3939         if (netif_running(netdev))
3940                 igb_down(adapter);
3941
3942         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3943                  netdev->mtu, new_mtu);
3944         netdev->mtu = new_mtu;
3945
3946         for (i = 0; i < adapter->num_rx_queues; i++)
3947                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3948
3949         if (netif_running(netdev))
3950                 igb_up(adapter);
3951         else
3952                 igb_reset(adapter);
3953
3954         clear_bit(__IGB_RESETTING, &adapter->state);
3955
3956         return 0;
3957 }
3958
3959 /**
3960  * igb_update_stats - Update the board statistics counters
3961  * @adapter: board private structure
3962  **/
3963
3964 void igb_update_stats(struct igb_adapter *adapter)
3965 {
3966         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3967         struct e1000_hw *hw = &adapter->hw;
3968         struct pci_dev *pdev = adapter->pdev;
3969         u32 rnbc, reg;
3970         u16 phy_tmp;
3971         int i;
3972         u64 bytes, packets;
3973
3974 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3975
3976         /*
3977          * Prevent stats update while adapter is being reset, or if the pci
3978          * connection is down.
3979          */
3980         if (adapter->link_speed == 0)
3981                 return;
3982         if (pci_channel_offline(pdev))
3983                 return;
3984
3985         bytes = 0;
3986         packets = 0;
3987         for (i = 0; i < adapter->num_rx_queues; i++) {
3988                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3989                 struct igb_ring *ring = adapter->rx_ring[i];
3990                 ring->rx_stats.drops += rqdpc_tmp;
3991                 net_stats->rx_fifo_errors += rqdpc_tmp;
3992                 bytes += ring->rx_stats.bytes;
3993                 packets += ring->rx_stats.packets;
3994         }
3995
3996         net_stats->rx_bytes = bytes;
3997         net_stats->rx_packets = packets;
3998
3999         bytes = 0;
4000         packets = 0;
4001         for (i = 0; i < adapter->num_tx_queues; i++) {
4002                 struct igb_ring *ring = adapter->tx_ring[i];
4003                 bytes += ring->tx_stats.bytes;
4004                 packets += ring->tx_stats.packets;
4005         }
4006         net_stats->tx_bytes = bytes;
4007         net_stats->tx_packets = packets;
4008
4009         /* read stats registers */
4010         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4011         adapter->stats.gprc += rd32(E1000_GPRC);
4012         adapter->stats.gorc += rd32(E1000_GORCL);
4013         rd32(E1000_GORCH); /* clear GORCL */
4014         adapter->stats.bprc += rd32(E1000_BPRC);
4015         adapter->stats.mprc += rd32(E1000_MPRC);
4016         adapter->stats.roc += rd32(E1000_ROC);
4017
4018         adapter->stats.prc64 += rd32(E1000_PRC64);
4019         adapter->stats.prc127 += rd32(E1000_PRC127);
4020         adapter->stats.prc255 += rd32(E1000_PRC255);
4021         adapter->stats.prc511 += rd32(E1000_PRC511);
4022         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4023         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4024         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4025         adapter->stats.sec += rd32(E1000_SEC);
4026
4027         adapter->stats.mpc += rd32(E1000_MPC);
4028         adapter->stats.scc += rd32(E1000_SCC);
4029         adapter->stats.ecol += rd32(E1000_ECOL);
4030         adapter->stats.mcc += rd32(E1000_MCC);
4031         adapter->stats.latecol += rd32(E1000_LATECOL);
4032         adapter->stats.dc += rd32(E1000_DC);
4033         adapter->stats.rlec += rd32(E1000_RLEC);
4034         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4035         adapter->stats.xontxc += rd32(E1000_XONTXC);
4036         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4037         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4038         adapter->stats.fcruc += rd32(E1000_FCRUC);
4039         adapter->stats.gptc += rd32(E1000_GPTC);
4040         adapter->stats.gotc += rd32(E1000_GOTCL);
4041         rd32(E1000_GOTCH); /* clear GOTCL */
4042         rnbc = rd32(E1000_RNBC);
4043         adapter->stats.rnbc += rnbc;
4044         net_stats->rx_fifo_errors += rnbc;
4045         adapter->stats.ruc += rd32(E1000_RUC);
4046         adapter->stats.rfc += rd32(E1000_RFC);
4047         adapter->stats.rjc += rd32(E1000_RJC);
4048         adapter->stats.tor += rd32(E1000_TORH);
4049         adapter->stats.tot += rd32(E1000_TOTH);
4050         adapter->stats.tpr += rd32(E1000_TPR);
4051
4052         adapter->stats.ptc64 += rd32(E1000_PTC64);
4053         adapter->stats.ptc127 += rd32(E1000_PTC127);
4054         adapter->stats.ptc255 += rd32(E1000_PTC255);
4055         adapter->stats.ptc511 += rd32(E1000_PTC511);
4056         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4057         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4058
4059         adapter->stats.mptc += rd32(E1000_MPTC);
4060         adapter->stats.bptc += rd32(E1000_BPTC);
4061
4062         adapter->stats.tpt += rd32(E1000_TPT);
4063         adapter->stats.colc += rd32(E1000_COLC);
4064
4065         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4066         /* read internal phy specific stats */
4067         reg = rd32(E1000_CTRL_EXT);
4068         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4069                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4070                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4071         }
4072
4073         adapter->stats.tsctc += rd32(E1000_TSCTC);
4074         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4075
4076         adapter->stats.iac += rd32(E1000_IAC);
4077         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4078         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4079         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4080         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4081         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4082         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4083         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4084         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4085
4086         /* Fill out the OS statistics structure */
4087         net_stats->multicast = adapter->stats.mprc;
4088         net_stats->collisions = adapter->stats.colc;
4089
4090         /* Rx Errors */
4091
4092         /* RLEC on some newer hardware can be incorrect so build
4093          * our own version based on RUC and ROC */
4094         net_stats->rx_errors = adapter->stats.rxerrc +
4095                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4096                 adapter->stats.ruc + adapter->stats.roc +
4097                 adapter->stats.cexterr;
4098         net_stats->rx_length_errors = adapter->stats.ruc +
4099                                       adapter->stats.roc;
4100         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4101         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4102         net_stats->rx_missed_errors = adapter->stats.mpc;
4103
4104         /* Tx Errors */
4105         net_stats->tx_errors = adapter->stats.ecol +
4106                                adapter->stats.latecol;
4107         net_stats->tx_aborted_errors = adapter->stats.ecol;
4108         net_stats->tx_window_errors = adapter->stats.latecol;
4109         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4110
4111         /* Tx Dropped needs to be maintained elsewhere */
4112
4113         /* Phy Stats */
4114         if (hw->phy.media_type == e1000_media_type_copper) {
4115                 if ((adapter->link_speed == SPEED_1000) &&
4116                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4117                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4118                         adapter->phy_stats.idle_errors += phy_tmp;
4119                 }
4120         }
4121
4122         /* Management Stats */
4123         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4124         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4125         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4126 }
4127
4128 static irqreturn_t igb_msix_other(int irq, void *data)
4129 {
4130         struct igb_adapter *adapter = data;
4131         struct e1000_hw *hw = &adapter->hw;
4132         u32 icr = rd32(E1000_ICR);
4133         /* reading ICR causes bit 31 of EICR to be cleared */
4134
4135         if (icr & E1000_ICR_DRSTA)
4136                 schedule_work(&adapter->reset_task);
4137
4138         if (icr & E1000_ICR_DOUTSYNC) {
4139                 /* HW is reporting DMA is out of sync */
4140                 adapter->stats.doosync++;
4141         }
4142
4143         /* Check for a mailbox event */
4144         if (icr & E1000_ICR_VMMB)
4145                 igb_msg_task(adapter);
4146
4147         if (icr & E1000_ICR_LSC) {
4148                 hw->mac.get_link_status = 1;
4149                 /* guard against interrupt when we're going down */
4150                 if (!test_bit(__IGB_DOWN, &adapter->state))
4151                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4152         }
4153
4154         if (adapter->vfs_allocated_count)
4155                 wr32(E1000_IMS, E1000_IMS_LSC |
4156                                 E1000_IMS_VMMB |
4157                                 E1000_IMS_DOUTSYNC);
4158         else
4159                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4160         wr32(E1000_EIMS, adapter->eims_other);
4161
4162         return IRQ_HANDLED;
4163 }
4164
4165 static void igb_write_itr(struct igb_q_vector *q_vector)
4166 {
4167         struct igb_adapter *adapter = q_vector->adapter;
4168         u32 itr_val = q_vector->itr_val & 0x7FFC;
4169
4170         if (!q_vector->set_itr)
4171                 return;
4172
4173         if (!itr_val)
4174                 itr_val = 0x4;
4175
4176         if (adapter->hw.mac.type == e1000_82575)
4177                 itr_val |= itr_val << 16;
4178         else
4179                 itr_val |= 0x8000000;
4180
4181         writel(itr_val, q_vector->itr_register);
4182         q_vector->set_itr = 0;
4183 }
4184
4185 static irqreturn_t igb_msix_ring(int irq, void *data)
4186 {
4187         struct igb_q_vector *q_vector = data;
4188
4189         /* Write the ITR value calculated from the previous interrupt. */
4190         igb_write_itr(q_vector);
4191
4192         napi_schedule(&q_vector->napi);
4193
4194         return IRQ_HANDLED;
4195 }
4196
4197 #ifdef CONFIG_IGB_DCA
4198 static void igb_update_dca(struct igb_q_vector *q_vector)
4199 {
4200         struct igb_adapter *adapter = q_vector->adapter;
4201         struct e1000_hw *hw = &adapter->hw;
4202         int cpu = get_cpu();
4203
4204         if (q_vector->cpu == cpu)
4205                 goto out_no_update;
4206
4207         if (q_vector->tx_ring) {
4208                 int q = q_vector->tx_ring->reg_idx;
4209                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4210                 if (hw->mac.type == e1000_82575) {
4211                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4212                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4213                 } else {
4214                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4215                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4216                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4217                 }
4218                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4219                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4220         }
4221         if (q_vector->rx_ring) {
4222                 int q = q_vector->rx_ring->reg_idx;
4223                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4224                 if (hw->mac.type == e1000_82575) {
4225                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4226                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4227                 } else {
4228                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4229                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4230                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4231                 }
4232                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4233                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4234                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4235                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4236         }
4237         q_vector->cpu = cpu;
4238 out_no_update:
4239         put_cpu();
4240 }
4241
4242 static void igb_setup_dca(struct igb_adapter *adapter)
4243 {
4244         struct e1000_hw *hw = &adapter->hw;
4245         int i;
4246
4247         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4248                 return;
4249
4250         /* Always use CB2 mode, difference is masked in the CB driver. */
4251         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4252
4253         for (i = 0; i < adapter->num_q_vectors; i++) {
4254                 adapter->q_vector[i]->cpu = -1;
4255                 igb_update_dca(adapter->q_vector[i]);
4256         }
4257 }
4258
4259 static int __igb_notify_dca(struct device *dev, void *data)
4260 {
4261         struct net_device *netdev = dev_get_drvdata(dev);
4262         struct igb_adapter *adapter = netdev_priv(netdev);
4263         struct pci_dev *pdev = adapter->pdev;
4264         struct e1000_hw *hw = &adapter->hw;
4265         unsigned long event = *(unsigned long *)data;
4266
4267         switch (event) {
4268         case DCA_PROVIDER_ADD:
4269                 /* if already enabled, don't do it again */
4270                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4271                         break;
4272                 if (dca_add_requester(dev) == 0) {
4273                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4274                         dev_info(&pdev->dev, "DCA enabled\n");
4275                         igb_setup_dca(adapter);
4276                         break;
4277                 }
4278                 /* Fall Through since DCA is disabled. */
4279         case DCA_PROVIDER_REMOVE:
4280                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4281                         /* without this a class_device is left
4282                          * hanging around in the sysfs model */
4283                         dca_remove_requester(dev);
4284                         dev_info(&pdev->dev, "DCA disabled\n");
4285                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4286                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4287                 }
4288                 break;
4289         }
4290
4291         return 0;
4292 }
4293
4294 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4295                           void *p)
4296 {
4297         int ret_val;
4298
4299         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4300                                          __igb_notify_dca);
4301
4302         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4303 }
4304 #endif /* CONFIG_IGB_DCA */
4305
4306 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4307 {
4308         struct e1000_hw *hw = &adapter->hw;
4309         u32 ping;
4310         int i;
4311
4312         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4313                 ping = E1000_PF_CONTROL_MSG;
4314                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4315                         ping |= E1000_VT_MSGTYPE_CTS;
4316                 igb_write_mbx(hw, &ping, 1, i);
4317         }
4318 }
4319
4320 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4321 {
4322         struct e1000_hw *hw = &adapter->hw;
4323         u32 vmolr = rd32(E1000_VMOLR(vf));
4324         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4325
4326         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4327                             IGB_VF_FLAG_MULTI_PROMISC);
4328         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4329
4330         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4331                 vmolr |= E1000_VMOLR_MPME;
4332                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4333         } else {
4334                 /*
4335                  * if we have hashes and we are clearing a multicast promisc
4336                  * flag we need to write the hashes to the MTA as this step
4337                  * was previously skipped
4338                  */
4339                 if (vf_data->num_vf_mc_hashes > 30) {
4340                         vmolr |= E1000_VMOLR_MPME;
4341                 } else if (vf_data->num_vf_mc_hashes) {
4342                         int j;
4343                         vmolr |= E1000_VMOLR_ROMPE;
4344                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4345                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4346                 }
4347         }
4348
4349         wr32(E1000_VMOLR(vf), vmolr);
4350
4351         /* there are flags left unprocessed, likely not supported */
4352         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4353                 return -EINVAL;
4354
4355         return 0;
4356
4357 }
4358
4359 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4360                                   u32 *msgbuf, u32 vf)
4361 {
4362         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4363         u16 *hash_list = (u16 *)&msgbuf[1];
4364         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4365         int i;
4366
4367         /* salt away the number of multicast addresses assigned
4368          * to this VF for later use to restore when the PF multi cast
4369          * list changes
4370          */
4371         vf_data->num_vf_mc_hashes = n;
4372
4373         /* only up to 30 hash values supported */
4374         if (n > 30)
4375                 n = 30;
4376
4377         /* store the hashes for later use */
4378         for (i = 0; i < n; i++)
4379                 vf_data->vf_mc_hashes[i] = hash_list[i];
4380
4381         /* Flush and reset the mta with the new values */
4382         igb_set_rx_mode(adapter->netdev);
4383
4384         return 0;
4385 }
4386
4387 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4388 {
4389         struct e1000_hw *hw = &adapter->hw;
4390         struct vf_data_storage *vf_data;
4391         int i, j;
4392
4393         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4394                 u32 vmolr = rd32(E1000_VMOLR(i));
4395                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4396
4397                 vf_data = &adapter->vf_data[i];
4398
4399                 if ((vf_data->num_vf_mc_hashes > 30) ||
4400                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4401                         vmolr |= E1000_VMOLR_MPME;
4402                 } else if (vf_data->num_vf_mc_hashes) {
4403                         vmolr |= E1000_VMOLR_ROMPE;
4404                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4405                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4406                 }
4407                 wr32(E1000_VMOLR(i), vmolr);
4408         }
4409 }
4410
4411 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4412 {
4413         struct e1000_hw *hw = &adapter->hw;
4414         u32 pool_mask, reg, vid;
4415         int i;
4416
4417         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4418
4419         /* Find the vlan filter for this id */
4420         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4421                 reg = rd32(E1000_VLVF(i));
4422
4423                 /* remove the vf from the pool */
4424                 reg &= ~pool_mask;
4425
4426                 /* if pool is empty then remove entry from vfta */
4427                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4428                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4429                         reg = 0;
4430                         vid = reg & E1000_VLVF_VLANID_MASK;
4431                         igb_vfta_set(hw, vid, false);
4432                 }
4433
4434                 wr32(E1000_VLVF(i), reg);
4435         }
4436
4437         adapter->vf_data[vf].vlans_enabled = 0;
4438 }
4439
4440 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4441 {
4442         struct e1000_hw *hw = &adapter->hw;
4443         u32 reg, i;
4444
4445         /* The vlvf table only exists on 82576 hardware and newer */
4446         if (hw->mac.type < e1000_82576)
4447                 return -1;
4448
4449         /* we only need to do this if VMDq is enabled */
4450         if (!adapter->vfs_allocated_count)
4451                 return -1;
4452
4453         /* Find the vlan filter for this id */
4454         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4455                 reg = rd32(E1000_VLVF(i));
4456                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4457                     vid == (reg & E1000_VLVF_VLANID_MASK))
4458                         break;
4459         }
4460
4461         if (add) {
4462                 if (i == E1000_VLVF_ARRAY_SIZE) {
4463                         /* Did not find a matching VLAN ID entry that was
4464                          * enabled.  Search for a free filter entry, i.e.
4465                          * one without the enable bit set
4466                          */
4467                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4468                                 reg = rd32(E1000_VLVF(i));
4469                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4470                                         break;
4471                         }
4472                 }
4473                 if (i < E1000_VLVF_ARRAY_SIZE) {
4474                         /* Found an enabled/available entry */
4475                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4476
4477                         /* if !enabled we need to set this up in vfta */
4478                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4479                                 /* add VID to filter table */
4480                                 igb_vfta_set(hw, vid, true);
4481                                 reg |= E1000_VLVF_VLANID_ENABLE;
4482                         }
4483                         reg &= ~E1000_VLVF_VLANID_MASK;
4484                         reg |= vid;
4485                         wr32(E1000_VLVF(i), reg);
4486
4487                         /* do not modify RLPML for PF devices */
4488                         if (vf >= adapter->vfs_allocated_count)
4489                                 return 0;
4490
4491                         if (!adapter->vf_data[vf].vlans_enabled) {
4492                                 u32 size;
4493                                 reg = rd32(E1000_VMOLR(vf));
4494                                 size = reg & E1000_VMOLR_RLPML_MASK;
4495                                 size += 4;
4496                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4497                                 reg |= size;
4498                                 wr32(E1000_VMOLR(vf), reg);
4499                         }
4500
4501                         adapter->vf_data[vf].vlans_enabled++;
4502                         return 0;
4503                 }
4504         } else {
4505                 if (i < E1000_VLVF_ARRAY_SIZE) {
4506                         /* remove vf from the pool */
4507                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4508                         /* if pool is empty then remove entry from vfta */
4509                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4510                                 reg = 0;
4511                                 igb_vfta_set(hw, vid, false);
4512                         }
4513                         wr32(E1000_VLVF(i), reg);
4514
4515                         /* do not modify RLPML for PF devices */
4516                         if (vf >= adapter->vfs_allocated_count)
4517                                 return 0;
4518
4519                         adapter->vf_data[vf].vlans_enabled--;
4520                         if (!adapter->vf_data[vf].vlans_enabled) {
4521                                 u32 size;
4522                                 reg = rd32(E1000_VMOLR(vf));
4523                                 size = reg & E1000_VMOLR_RLPML_MASK;
4524                                 size -= 4;
4525                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4526                                 reg |= size;
4527                                 wr32(E1000_VMOLR(vf), reg);
4528                         }
4529                 }
4530         }
4531         return 0;
4532 }
4533
4534 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4535 {
4536         struct e1000_hw *hw = &adapter->hw;
4537
4538         if (vid)
4539                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4540         else
4541                 wr32(E1000_VMVIR(vf), 0);
4542 }
4543
4544 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4545                                int vf, u16 vlan, u8 qos)
4546 {
4547         int err = 0;
4548         struct igb_adapter *adapter = netdev_priv(netdev);
4549
4550         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4551                 return -EINVAL;
4552         if (vlan || qos) {
4553                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4554                 if (err)
4555                         goto out;
4556                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4557                 igb_set_vmolr(adapter, vf, !vlan);
4558                 adapter->vf_data[vf].pf_vlan = vlan;
4559                 adapter->vf_data[vf].pf_qos = qos;
4560                 dev_info(&adapter->pdev->dev,
4561                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4562                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4563                         dev_warn(&adapter->pdev->dev,
4564                                  "The VF VLAN has been set,"
4565                                  " but the PF device is not up.\n");
4566                         dev_warn(&adapter->pdev->dev,
4567                                  "Bring the PF device up before"
4568                                  " attempting to use the VF device.\n");
4569                 }
4570         } else {
4571                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4572                                    false, vf);
4573                 igb_set_vmvir(adapter, vlan, vf);
4574                 igb_set_vmolr(adapter, vf, true);
4575                 adapter->vf_data[vf].pf_vlan = 0;
4576                 adapter->vf_data[vf].pf_qos = 0;
4577        }
4578 out:
4579        return err;
4580 }
4581
4582 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4583 {
4584         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4585         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4586
4587         return igb_vlvf_set(adapter, vid, add, vf);
4588 }
4589
4590 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4591 {
4592         /* clear flags */
4593         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4594         adapter->vf_data[vf].last_nack = jiffies;
4595
4596         /* reset offloads to defaults */
4597         igb_set_vmolr(adapter, vf, true);
4598
4599         /* reset vlans for device */
4600         igb_clear_vf_vfta(adapter, vf);
4601         if (adapter->vf_data[vf].pf_vlan)
4602                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4603                                     adapter->vf_data[vf].pf_vlan,
4604                                     adapter->vf_data[vf].pf_qos);
4605         else
4606                 igb_clear_vf_vfta(adapter, vf);
4607
4608         /* reset multicast table array for vf */
4609         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4610
4611         /* Flush and reset the mta with the new values */
4612         igb_set_rx_mode(adapter->netdev);
4613 }
4614
4615 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4616 {
4617         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4618
4619         /* generate a new mac address as we were hotplug removed/added */
4620         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4621                 random_ether_addr(vf_mac);
4622
4623         /* process remaining reset events */
4624         igb_vf_reset(adapter, vf);
4625 }
4626
4627 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4628 {
4629         struct e1000_hw *hw = &adapter->hw;
4630         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4631         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4632         u32 reg, msgbuf[3];
4633         u8 *addr = (u8 *)(&msgbuf[1]);
4634
4635         /* process all the same items cleared in a function level reset */
4636         igb_vf_reset(adapter, vf);
4637
4638         /* set vf mac address */
4639         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4640
4641         /* enable transmit and receive for vf */
4642         reg = rd32(E1000_VFTE);
4643         wr32(E1000_VFTE, reg | (1 << vf));
4644         reg = rd32(E1000_VFRE);
4645         wr32(E1000_VFRE, reg | (1 << vf));
4646
4647         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4648
4649         /* reply to reset with ack and vf mac address */
4650         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4651         memcpy(addr, vf_mac, 6);
4652         igb_write_mbx(hw, msgbuf, 3, vf);
4653 }
4654
4655 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4656 {
4657         unsigned char *addr = (char *)&msg[1];
4658         int err = -1;
4659
4660         if (is_valid_ether_addr(addr))
4661                 err = igb_set_vf_mac(adapter, vf, addr);
4662
4663         return err;
4664 }
4665
4666 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4667 {
4668         struct e1000_hw *hw = &adapter->hw;
4669         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4670         u32 msg = E1000_VT_MSGTYPE_NACK;
4671
4672         /* if device isn't clear to send it shouldn't be reading either */
4673         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4674             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4675                 igb_write_mbx(hw, &msg, 1, vf);
4676                 vf_data->last_nack = jiffies;
4677         }
4678 }
4679
4680 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4681 {
4682         struct pci_dev *pdev = adapter->pdev;
4683         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4684         struct e1000_hw *hw = &adapter->hw;
4685         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4686         s32 retval;
4687
4688         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4689
4690         if (retval) {
4691                 /* if receive failed revoke VF CTS stats and restart init */
4692                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4693                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4694                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4695                         return;
4696                 goto out;
4697         }
4698
4699         /* this is a message we already processed, do nothing */
4700         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4701                 return;
4702
4703         /*
4704          * until the vf completes a reset it should not be
4705          * allowed to start any configuration.
4706          */
4707
4708         if (msgbuf[0] == E1000_VF_RESET) {
4709                 igb_vf_reset_msg(adapter, vf);
4710                 return;
4711         }
4712
4713         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4714                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4715                         return;
4716                 retval = -1;
4717                 goto out;
4718         }
4719
4720         switch ((msgbuf[0] & 0xFFFF)) {
4721         case E1000_VF_SET_MAC_ADDR:
4722                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4723                 break;
4724         case E1000_VF_SET_PROMISC:
4725                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4726                 break;
4727         case E1000_VF_SET_MULTICAST:
4728                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4729                 break;
4730         case E1000_VF_SET_LPE:
4731                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4732                 break;
4733         case E1000_VF_SET_VLAN:
4734                 if (adapter->vf_data[vf].pf_vlan)
4735                         retval = -1;
4736                 else
4737                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4738                 break;
4739         default:
4740                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4741                 retval = -1;
4742                 break;
4743         }
4744
4745         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4746 out:
4747         /* notify the VF of the results of what it sent us */
4748         if (retval)
4749                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4750         else
4751                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4752
4753         igb_write_mbx(hw, msgbuf, 1, vf);
4754 }
4755
4756 static void igb_msg_task(struct igb_adapter *adapter)
4757 {
4758         struct e1000_hw *hw = &adapter->hw;
4759         u32 vf;
4760
4761         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4762                 /* process any reset requests */
4763                 if (!igb_check_for_rst(hw, vf))
4764                         igb_vf_reset_event(adapter, vf);
4765
4766                 /* process any messages pending */
4767                 if (!igb_check_for_msg(hw, vf))
4768                         igb_rcv_msg_from_vf(adapter, vf);
4769
4770                 /* process any acks */
4771                 if (!igb_check_for_ack(hw, vf))
4772                         igb_rcv_ack_from_vf(adapter, vf);
4773         }
4774 }
4775
4776 /**
4777  *  igb_set_uta - Set unicast filter table address
4778  *  @adapter: board private structure
4779  *
4780  *  The unicast table address is a register array of 32-bit registers.
4781  *  The table is meant to be used in a way similar to how the MTA is used
4782  *  however due to certain limitations in the hardware it is necessary to
4783  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4784  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4785  **/
4786 static void igb_set_uta(struct igb_adapter *adapter)
4787 {
4788         struct e1000_hw *hw = &adapter->hw;
4789         int i;
4790
4791         /* The UTA table only exists on 82576 hardware and newer */
4792         if (hw->mac.type < e1000_82576)
4793                 return;
4794
4795         /* we only need to do this if VMDq is enabled */
4796         if (!adapter->vfs_allocated_count)
4797                 return;
4798
4799         for (i = 0; i < hw->mac.uta_reg_count; i++)
4800                 array_wr32(E1000_UTA, i, ~0);
4801 }
4802
4803 /**
4804  * igb_intr_msi - Interrupt Handler
4805  * @irq: interrupt number
4806  * @data: pointer to a network interface device structure
4807  **/
4808 static irqreturn_t igb_intr_msi(int irq, void *data)
4809 {
4810         struct igb_adapter *adapter = data;
4811         struct igb_q_vector *q_vector = adapter->q_vector[0];
4812         struct e1000_hw *hw = &adapter->hw;
4813         /* read ICR disables interrupts using IAM */
4814         u32 icr = rd32(E1000_ICR);
4815
4816         igb_write_itr(q_vector);
4817
4818         if (icr & E1000_ICR_DRSTA)
4819                 schedule_work(&adapter->reset_task);
4820
4821         if (icr & E1000_ICR_DOUTSYNC) {
4822                 /* HW is reporting DMA is out of sync */
4823                 adapter->stats.doosync++;
4824         }
4825
4826         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4827                 hw->mac.get_link_status = 1;
4828                 if (!test_bit(__IGB_DOWN, &adapter->state))
4829                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4830         }
4831
4832         napi_schedule(&q_vector->napi);
4833
4834         return IRQ_HANDLED;
4835 }
4836
4837 /**
4838  * igb_intr - Legacy Interrupt Handler
4839  * @irq: interrupt number
4840  * @data: pointer to a network interface device structure
4841  **/
4842 static irqreturn_t igb_intr(int irq, void *data)
4843 {
4844         struct igb_adapter *adapter = data;
4845         struct igb_q_vector *q_vector = adapter->q_vector[0];
4846         struct e1000_hw *hw = &adapter->hw;
4847         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4848          * need for the IMC write */
4849         u32 icr = rd32(E1000_ICR);
4850         if (!icr)
4851                 return IRQ_NONE;  /* Not our interrupt */
4852
4853         igb_write_itr(q_vector);
4854
4855         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4856          * not set, then the adapter didn't send an interrupt */
4857         if (!(icr & E1000_ICR_INT_ASSERTED))
4858                 return IRQ_NONE;
4859
4860         if (icr & E1000_ICR_DRSTA)
4861                 schedule_work(&adapter->reset_task);
4862
4863         if (icr & E1000_ICR_DOUTSYNC) {
4864                 /* HW is reporting DMA is out of sync */
4865                 adapter->stats.doosync++;
4866         }
4867
4868         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4869                 hw->mac.get_link_status = 1;
4870                 /* guard against interrupt when we're going down */
4871                 if (!test_bit(__IGB_DOWN, &adapter->state))
4872                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4873         }
4874
4875         napi_schedule(&q_vector->napi);
4876
4877         return IRQ_HANDLED;
4878 }
4879
4880 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4881 {
4882         struct igb_adapter *adapter = q_vector->adapter;
4883         struct e1000_hw *hw = &adapter->hw;
4884
4885         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4886             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4887                 if (!adapter->msix_entries)
4888                         igb_set_itr(adapter);
4889                 else
4890                         igb_update_ring_itr(q_vector);
4891         }
4892
4893         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4894                 if (adapter->msix_entries)
4895                         wr32(E1000_EIMS, q_vector->eims_value);
4896                 else
4897                         igb_irq_enable(adapter);
4898         }
4899 }
4900
4901 /**
4902  * igb_poll - NAPI Rx polling callback
4903  * @napi: napi polling structure
4904  * @budget: count of how many packets we should handle
4905  **/
4906 static int igb_poll(struct napi_struct *napi, int budget)
4907 {
4908         struct igb_q_vector *q_vector = container_of(napi,
4909                                                      struct igb_q_vector,
4910                                                      napi);
4911         int tx_clean_complete = 1, work_done = 0;
4912
4913 #ifdef CONFIG_IGB_DCA
4914         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4915                 igb_update_dca(q_vector);
4916 #endif
4917         if (q_vector->tx_ring)
4918                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4919
4920         if (q_vector->rx_ring)
4921                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4922
4923         if (!tx_clean_complete)
4924                 work_done = budget;
4925
4926         /* If not enough Rx work done, exit the polling mode */
4927         if (work_done < budget) {
4928                 napi_complete(napi);
4929                 igb_ring_irq_enable(q_vector);
4930         }
4931
4932         return work_done;
4933 }
4934
4935 /**
4936  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4937  * @adapter: board private structure
4938  * @shhwtstamps: timestamp structure to update
4939  * @regval: unsigned 64bit system time value.
4940  *
4941  * We need to convert the system time value stored in the RX/TXSTMP registers
4942  * into a hwtstamp which can be used by the upper level timestamping functions
4943  */
4944 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4945                                    struct skb_shared_hwtstamps *shhwtstamps,
4946                                    u64 regval)
4947 {
4948         u64 ns;
4949
4950         /*
4951          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4952          * 24 to match clock shift we setup earlier.
4953          */
4954         if (adapter->hw.mac.type == e1000_82580)
4955                 regval <<= IGB_82580_TSYNC_SHIFT;
4956
4957         ns = timecounter_cyc2time(&adapter->clock, regval);
4958         timecompare_update(&adapter->compare, ns);
4959         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4960         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4961         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4962 }
4963
4964 /**
4965  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4966  * @q_vector: pointer to q_vector containing needed info
4967  * @skb: packet that was just sent
4968  *
4969  * If we were asked to do hardware stamping and such a time stamp is
4970  * available, then it must have been for this skb here because we only
4971  * allow only one such packet into the queue.
4972  */
4973 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4974 {
4975         struct igb_adapter *adapter = q_vector->adapter;
4976         union skb_shared_tx *shtx = skb_tx(skb);
4977         struct e1000_hw *hw = &adapter->hw;
4978         struct skb_shared_hwtstamps shhwtstamps;
4979         u64 regval;
4980
4981         /* if skb does not support hw timestamp or TX stamp not valid exit */
4982         if (likely(!shtx->hardware) ||
4983             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4984                 return;
4985
4986         regval = rd32(E1000_TXSTMPL);
4987         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4988
4989         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4990         skb_tstamp_tx(skb, &shhwtstamps);
4991 }
4992
4993 /**
4994  * igb_clean_tx_irq - Reclaim resources after transmit completes
4995  * @q_vector: pointer to q_vector containing needed info
4996  * returns true if ring is completely cleaned
4997  **/
4998 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4999 {
5000         struct igb_adapter *adapter = q_vector->adapter;
5001         struct igb_ring *tx_ring = q_vector->tx_ring;
5002         struct net_device *netdev = tx_ring->netdev;
5003         struct e1000_hw *hw = &adapter->hw;
5004         struct igb_buffer *buffer_info;
5005         struct sk_buff *skb;
5006         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5007         unsigned int total_bytes = 0, total_packets = 0;
5008         unsigned int i, eop, count = 0;
5009         bool cleaned = false;
5010
5011         i = tx_ring->next_to_clean;
5012         eop = tx_ring->buffer_info[i].next_to_watch;
5013         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5014
5015         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5016                (count < tx_ring->count)) {
5017                 for (cleaned = false; !cleaned; count++) {
5018                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5019                         buffer_info = &tx_ring->buffer_info[i];
5020                         cleaned = (i == eop);
5021                         skb = buffer_info->skb;
5022
5023                         if (skb) {
5024                                 unsigned int segs, bytecount;
5025                                 /* gso_segs is currently only valid for tcp */
5026                                 segs = buffer_info->gso_segs;
5027                                 /* multiply data chunks by size of headers */
5028                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5029                                             skb->len;
5030                                 total_packets += segs;
5031                                 total_bytes += bytecount;
5032
5033                                 igb_tx_hwtstamp(q_vector, skb);
5034                         }
5035
5036                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5037                         tx_desc->wb.status = 0;
5038
5039                         i++;
5040                         if (i == tx_ring->count)
5041                                 i = 0;
5042                 }
5043                 eop = tx_ring->buffer_info[i].next_to_watch;
5044                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5045         }
5046
5047         tx_ring->next_to_clean = i;
5048
5049         if (unlikely(count &&
5050                      netif_carrier_ok(netdev) &&
5051                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5052                 /* Make sure that anybody stopping the queue after this
5053                  * sees the new next_to_clean.
5054                  */
5055                 smp_mb();
5056                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5057                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5058                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5059                         tx_ring->tx_stats.restart_queue++;
5060                 }
5061         }
5062
5063         if (tx_ring->detect_tx_hung) {
5064                 /* Detect a transmit hang in hardware, this serializes the
5065                  * check with the clearing of time_stamp and movement of i */
5066                 tx_ring->detect_tx_hung = false;
5067                 if (tx_ring->buffer_info[i].time_stamp &&
5068                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5069                                (adapter->tx_timeout_factor * HZ)) &&
5070                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5071
5072                         /* detected Tx unit hang */
5073                         dev_err(&tx_ring->pdev->dev,
5074                                 "Detected Tx Unit Hang\n"
5075                                 "  Tx Queue             <%d>\n"
5076                                 "  TDH                  <%x>\n"
5077                                 "  TDT                  <%x>\n"
5078                                 "  next_to_use          <%x>\n"
5079                                 "  next_to_clean        <%x>\n"
5080                                 "buffer_info[next_to_clean]\n"
5081                                 "  time_stamp           <%lx>\n"
5082                                 "  next_to_watch        <%x>\n"
5083                                 "  jiffies              <%lx>\n"
5084                                 "  desc.status          <%x>\n",
5085                                 tx_ring->queue_index,
5086                                 readl(tx_ring->head),
5087                                 readl(tx_ring->tail),
5088                                 tx_ring->next_to_use,
5089                                 tx_ring->next_to_clean,
5090                                 tx_ring->buffer_info[eop].time_stamp,
5091                                 eop,
5092                                 jiffies,
5093                                 eop_desc->wb.status);
5094                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5095                 }
5096         }
5097         tx_ring->total_bytes += total_bytes;
5098         tx_ring->total_packets += total_packets;
5099         tx_ring->tx_stats.bytes += total_bytes;
5100         tx_ring->tx_stats.packets += total_packets;
5101         return (count < tx_ring->count);
5102 }
5103
5104 /**
5105  * igb_receive_skb - helper function to handle rx indications
5106  * @q_vector: structure containing interrupt and ring information
5107  * @skb: packet to send up
5108  * @vlan_tag: vlan tag for packet
5109  **/
5110 static void igb_receive_skb(struct igb_q_vector *q_vector,
5111                             struct sk_buff *skb,
5112                             u16 vlan_tag)
5113 {
5114         struct igb_adapter *adapter = q_vector->adapter;
5115
5116         if (vlan_tag)
5117                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5118                                  vlan_tag, skb);
5119         else
5120                 napi_gro_receive(&q_vector->napi, skb);
5121 }
5122
5123 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5124                                        u32 status_err, struct sk_buff *skb)
5125 {
5126         skb->ip_summed = CHECKSUM_NONE;
5127
5128         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5129         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5130              (status_err & E1000_RXD_STAT_IXSM))
5131                 return;
5132
5133         /* TCP/UDP checksum error bit is set */
5134         if (status_err &
5135             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5136                 /*
5137                  * work around errata with sctp packets where the TCPE aka
5138                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5139                  * packets, (aka let the stack check the crc32c)
5140                  */
5141                 if ((skb->len == 60) &&
5142                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5143                         ring->rx_stats.csum_err++;
5144
5145                 /* let the stack verify checksum errors */
5146                 return;
5147         }
5148         /* It must be a TCP or UDP packet with a valid checksum */
5149         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5150                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5151
5152         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5153 }
5154
5155 static inline void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5156                                    struct sk_buff *skb)
5157 {
5158         struct igb_adapter *adapter = q_vector->adapter;
5159         struct e1000_hw *hw = &adapter->hw;
5160         u64 regval;
5161
5162         /*
5163          * If this bit is set, then the RX registers contain the time stamp. No
5164          * other packet will be time stamped until we read these registers, so
5165          * read the registers to make them available again. Because only one
5166          * packet can be time stamped at a time, we know that the register
5167          * values must belong to this one here and therefore we don't need to
5168          * compare any of the additional attributes stored for it.
5169          *
5170          * If nothing went wrong, then it should have a skb_shared_tx that we
5171          * can turn into a skb_shared_hwtstamps.
5172          */
5173         if (likely(!(staterr & E1000_RXDADV_STAT_TS)))
5174                 return;
5175         if (!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5176                 return;
5177
5178         regval = rd32(E1000_RXSTMPL);
5179         regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5180
5181         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5182 }
5183 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5184                                union e1000_adv_rx_desc *rx_desc)
5185 {
5186         /* HW will not DMA in data larger than the given buffer, even if it
5187          * parses the (NFS, of course) header to be larger.  In that case, it
5188          * fills the header buffer and spills the rest into the page.
5189          */
5190         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5191                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5192         if (hlen > rx_ring->rx_buffer_len)
5193                 hlen = rx_ring->rx_buffer_len;
5194         return hlen;
5195 }
5196
5197 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5198                                  int *work_done, int budget)
5199 {
5200         struct igb_ring *rx_ring = q_vector->rx_ring;
5201         struct net_device *netdev = rx_ring->netdev;
5202         struct pci_dev *pdev = rx_ring->pdev;
5203         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5204         struct igb_buffer *buffer_info , *next_buffer;
5205         struct sk_buff *skb;
5206         bool cleaned = false;
5207         int cleaned_count = 0;
5208         int current_node = numa_node_id();
5209         unsigned int total_bytes = 0, total_packets = 0;
5210         unsigned int i;
5211         u32 staterr;
5212         u16 length;
5213         u16 vlan_tag;
5214
5215         i = rx_ring->next_to_clean;
5216         buffer_info = &rx_ring->buffer_info[i];
5217         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5218         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5219
5220         while (staterr & E1000_RXD_STAT_DD) {
5221                 if (*work_done >= budget)
5222                         break;
5223                 (*work_done)++;
5224
5225                 skb = buffer_info->skb;
5226                 prefetch(skb->data - NET_IP_ALIGN);
5227                 buffer_info->skb = NULL;
5228
5229                 i++;
5230                 if (i == rx_ring->count)
5231                         i = 0;
5232
5233                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5234                 prefetch(next_rxd);
5235                 next_buffer = &rx_ring->buffer_info[i];
5236
5237                 length = le16_to_cpu(rx_desc->wb.upper.length);
5238                 cleaned = true;
5239                 cleaned_count++;
5240
5241                 if (buffer_info->dma) {
5242                         pci_unmap_single(pdev, buffer_info->dma,
5243                                          rx_ring->rx_buffer_len,
5244                                          PCI_DMA_FROMDEVICE);
5245                         buffer_info->dma = 0;
5246                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5247                                 skb_put(skb, length);
5248                                 goto send_up;
5249                         }
5250                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5251                 }
5252
5253                 if (length) {
5254                         pci_unmap_page(pdev, buffer_info->page_dma,
5255                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5256                         buffer_info->page_dma = 0;
5257
5258                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5259                                                 buffer_info->page,
5260                                                 buffer_info->page_offset,
5261                                                 length);
5262
5263                         if ((page_count(buffer_info->page) != 1) ||
5264                             (page_to_nid(buffer_info->page) != current_node))
5265                                 buffer_info->page = NULL;
5266                         else
5267                                 get_page(buffer_info->page);
5268
5269                         skb->len += length;
5270                         skb->data_len += length;
5271                         skb->truesize += length;
5272                 }
5273
5274                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5275                         buffer_info->skb = next_buffer->skb;
5276                         buffer_info->dma = next_buffer->dma;
5277                         next_buffer->skb = skb;
5278                         next_buffer->dma = 0;
5279                         goto next_desc;
5280                 }
5281 send_up:
5282                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5283                         dev_kfree_skb_irq(skb);
5284                         goto next_desc;
5285                 }
5286
5287                 igb_rx_hwtstamp(q_vector, staterr, skb);
5288                 total_bytes += skb->len;
5289                 total_packets++;
5290
5291                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5292
5293                 skb->protocol = eth_type_trans(skb, netdev);
5294                 skb_record_rx_queue(skb, rx_ring->queue_index);
5295
5296                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5297                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5298
5299                 igb_receive_skb(q_vector, skb, vlan_tag);
5300
5301 next_desc:
5302                 rx_desc->wb.upper.status_error = 0;
5303
5304                 /* return some buffers to hardware, one at a time is too slow */
5305                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5306                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5307                         cleaned_count = 0;
5308                 }
5309
5310                 /* use prefetched values */
5311                 rx_desc = next_rxd;
5312                 buffer_info = next_buffer;
5313                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5314         }
5315
5316         rx_ring->next_to_clean = i;
5317         cleaned_count = igb_desc_unused(rx_ring);
5318
5319         if (cleaned_count)
5320                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5321
5322         rx_ring->total_packets += total_packets;
5323         rx_ring->total_bytes += total_bytes;
5324         rx_ring->rx_stats.packets += total_packets;
5325         rx_ring->rx_stats.bytes += total_bytes;
5326         return cleaned;
5327 }
5328
5329 /**
5330  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5331  * @adapter: address of board private structure
5332  **/
5333 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5334 {
5335         struct net_device *netdev = rx_ring->netdev;
5336         union e1000_adv_rx_desc *rx_desc;
5337         struct igb_buffer *buffer_info;
5338         struct sk_buff *skb;
5339         unsigned int i;
5340         int bufsz;
5341
5342         i = rx_ring->next_to_use;
5343         buffer_info = &rx_ring->buffer_info[i];
5344
5345         bufsz = rx_ring->rx_buffer_len;
5346
5347         while (cleaned_count--) {
5348                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5349
5350                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5351                         if (!buffer_info->page) {
5352                                 buffer_info->page = netdev_alloc_page(netdev);
5353                                 if (!buffer_info->page) {
5354                                         rx_ring->rx_stats.alloc_failed++;
5355                                         goto no_buffers;
5356                                 }
5357                                 buffer_info->page_offset = 0;
5358                         } else {
5359                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5360                         }
5361                         buffer_info->page_dma =
5362                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5363                                              buffer_info->page_offset,
5364                                              PAGE_SIZE / 2,
5365                                              PCI_DMA_FROMDEVICE);
5366                         if (pci_dma_mapping_error(rx_ring->pdev,
5367                                                   buffer_info->page_dma)) {
5368                                 buffer_info->page_dma = 0;
5369                                 rx_ring->rx_stats.alloc_failed++;
5370                                 goto no_buffers;
5371                         }
5372                 }
5373
5374                 skb = buffer_info->skb;
5375                 if (!skb) {
5376                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5377                         if (!skb) {
5378                                 rx_ring->rx_stats.alloc_failed++;
5379                                 goto no_buffers;
5380                         }
5381
5382                         buffer_info->skb = skb;
5383                 }
5384                 if (!buffer_info->dma) {
5385                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5386                                                           skb->data,
5387                                                           bufsz,
5388                                                           PCI_DMA_FROMDEVICE);
5389                         if (pci_dma_mapping_error(rx_ring->pdev,
5390                                                   buffer_info->dma)) {
5391                                 buffer_info->dma = 0;
5392                                 rx_ring->rx_stats.alloc_failed++;
5393                                 goto no_buffers;
5394                         }
5395                 }
5396                 /* Refresh the desc even if buffer_addrs didn't change because
5397                  * each write-back erases this info. */
5398                 if (bufsz < IGB_RXBUFFER_1024) {
5399                         rx_desc->read.pkt_addr =
5400                              cpu_to_le64(buffer_info->page_dma);
5401                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5402                 } else {
5403                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5404                         rx_desc->read.hdr_addr = 0;
5405                 }
5406
5407                 i++;
5408                 if (i == rx_ring->count)
5409                         i = 0;
5410                 buffer_info = &rx_ring->buffer_info[i];
5411         }
5412
5413 no_buffers:
5414         if (rx_ring->next_to_use != i) {
5415                 rx_ring->next_to_use = i;
5416                 if (i == 0)
5417                         i = (rx_ring->count - 1);
5418                 else
5419                         i--;
5420
5421                 /* Force memory writes to complete before letting h/w
5422                  * know there are new descriptors to fetch.  (Only
5423                  * applicable for weak-ordered memory model archs,
5424                  * such as IA-64). */
5425                 wmb();
5426                 writel(i, rx_ring->tail);
5427         }
5428 }
5429
5430 /**
5431  * igb_mii_ioctl -
5432  * @netdev:
5433  * @ifreq:
5434  * @cmd:
5435  **/
5436 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5437 {
5438         struct igb_adapter *adapter = netdev_priv(netdev);
5439         struct mii_ioctl_data *data = if_mii(ifr);
5440
5441         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5442                 return -EOPNOTSUPP;
5443
5444         switch (cmd) {
5445         case SIOCGMIIPHY:
5446                 data->phy_id = adapter->hw.phy.addr;
5447                 break;
5448         case SIOCGMIIREG:
5449                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5450                                      &data->val_out))
5451                         return -EIO;
5452                 break;
5453         case SIOCSMIIREG:
5454         default:
5455                 return -EOPNOTSUPP;
5456         }
5457         return 0;
5458 }
5459
5460 /**
5461  * igb_hwtstamp_ioctl - control hardware time stamping
5462  * @netdev:
5463  * @ifreq:
5464  * @cmd:
5465  *
5466  * Outgoing time stamping can be enabled and disabled. Play nice and
5467  * disable it when requested, although it shouldn't case any overhead
5468  * when no packet needs it. At most one packet in the queue may be
5469  * marked for time stamping, otherwise it would be impossible to tell
5470  * for sure to which packet the hardware time stamp belongs.
5471  *
5472  * Incoming time stamping has to be configured via the hardware
5473  * filters. Not all combinations are supported, in particular event
5474  * type has to be specified. Matching the kind of event packet is
5475  * not supported, with the exception of "all V2 events regardless of
5476  * level 2 or 4".
5477  *
5478  **/
5479 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5480                               struct ifreq *ifr, int cmd)
5481 {
5482         struct igb_adapter *adapter = netdev_priv(netdev);
5483         struct e1000_hw *hw = &adapter->hw;
5484         struct hwtstamp_config config;
5485         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5486         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5487         u32 tsync_rx_cfg = 0;
5488         bool is_l4 = false;
5489         bool is_l2 = false;
5490         u32 regval;
5491
5492         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5493                 return -EFAULT;
5494
5495         /* reserved for future extensions */
5496         if (config.flags)
5497                 return -EINVAL;
5498
5499         switch (config.tx_type) {
5500         case HWTSTAMP_TX_OFF:
5501                 tsync_tx_ctl = 0;
5502         case HWTSTAMP_TX_ON:
5503                 break;
5504         default:
5505                 return -ERANGE;
5506         }
5507
5508         switch (config.rx_filter) {
5509         case HWTSTAMP_FILTER_NONE:
5510                 tsync_rx_ctl = 0;
5511                 break;
5512         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5513         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5514         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5515         case HWTSTAMP_FILTER_ALL:
5516                 /*
5517                  * register TSYNCRXCFG must be set, therefore it is not
5518                  * possible to time stamp both Sync and Delay_Req messages
5519                  * => fall back to time stamping all packets
5520                  */
5521                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5522                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5523                 break;
5524         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5525                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5526                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5527                 is_l4 = true;
5528                 break;
5529         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5530                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5531                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5532                 is_l4 = true;
5533                 break;
5534         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5535         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5536                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5537                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5538                 is_l2 = true;
5539                 is_l4 = true;
5540                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5541                 break;
5542         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5543         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5544                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5545                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5546                 is_l2 = true;
5547                 is_l4 = true;
5548                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5549                 break;
5550         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5551         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5552         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5553                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5554                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5555                 is_l2 = true;
5556                 break;
5557         default:
5558                 return -ERANGE;
5559         }
5560
5561         if (hw->mac.type == e1000_82575) {
5562                 if (tsync_rx_ctl | tsync_tx_ctl)
5563                         return -EINVAL;
5564                 return 0;
5565         }
5566
5567         /* enable/disable TX */
5568         regval = rd32(E1000_TSYNCTXCTL);
5569         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5570         regval |= tsync_tx_ctl;
5571         wr32(E1000_TSYNCTXCTL, regval);
5572
5573         /* enable/disable RX */
5574         regval = rd32(E1000_TSYNCRXCTL);
5575         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5576         regval |= tsync_rx_ctl;
5577         wr32(E1000_TSYNCRXCTL, regval);
5578
5579         /* define which PTP packets are time stamped */
5580         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5581
5582         /* define ethertype filter for timestamped packets */
5583         if (is_l2)
5584                 wr32(E1000_ETQF(3),
5585                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5586                                  E1000_ETQF_1588 | /* enable timestamping */
5587                                  ETH_P_1588));     /* 1588 eth protocol type */
5588         else
5589                 wr32(E1000_ETQF(3), 0);
5590
5591 #define PTP_PORT 319
5592         /* L4 Queue Filter[3]: filter by destination port and protocol */
5593         if (is_l4) {
5594                 u32 ftqf = (IPPROTO_UDP /* UDP */
5595                         | E1000_FTQF_VF_BP /* VF not compared */
5596                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5597                         | E1000_FTQF_MASK); /* mask all inputs */
5598                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5599
5600                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5601                 wr32(E1000_IMIREXT(3),
5602                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5603                 if (hw->mac.type == e1000_82576) {
5604                         /* enable source port check */
5605                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5606                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5607                 }
5608                 wr32(E1000_FTQF(3), ftqf);
5609         } else {
5610                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5611         }
5612         wrfl();
5613
5614         adapter->hwtstamp_config = config;
5615
5616         /* clear TX/RX time stamp registers, just to be sure */
5617         regval = rd32(E1000_TXSTMPH);
5618         regval = rd32(E1000_RXSTMPH);
5619
5620         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5621                 -EFAULT : 0;
5622 }
5623
5624 /**
5625  * igb_ioctl -
5626  * @netdev:
5627  * @ifreq:
5628  * @cmd:
5629  **/
5630 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5631 {
5632         switch (cmd) {
5633         case SIOCGMIIPHY:
5634         case SIOCGMIIREG:
5635         case SIOCSMIIREG:
5636                 return igb_mii_ioctl(netdev, ifr, cmd);
5637         case SIOCSHWTSTAMP:
5638                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5639         default:
5640                 return -EOPNOTSUPP;
5641         }
5642 }
5643
5644 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5645 {
5646         struct igb_adapter *adapter = hw->back;
5647         u16 cap_offset;
5648
5649         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5650         if (!cap_offset)
5651                 return -E1000_ERR_CONFIG;
5652
5653         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5654
5655         return 0;
5656 }
5657
5658 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5659 {
5660         struct igb_adapter *adapter = hw->back;
5661         u16 cap_offset;
5662
5663         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5664         if (!cap_offset)
5665                 return -E1000_ERR_CONFIG;
5666
5667         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5668
5669         return 0;
5670 }
5671
5672 static void igb_vlan_rx_register(struct net_device *netdev,
5673                                  struct vlan_group *grp)
5674 {
5675         struct igb_adapter *adapter = netdev_priv(netdev);
5676         struct e1000_hw *hw = &adapter->hw;
5677         u32 ctrl, rctl;
5678
5679         igb_irq_disable(adapter);
5680         adapter->vlgrp = grp;
5681
5682         if (grp) {
5683                 /* enable VLAN tag insert/strip */
5684                 ctrl = rd32(E1000_CTRL);
5685                 ctrl |= E1000_CTRL_VME;
5686                 wr32(E1000_CTRL, ctrl);
5687
5688                 /* Disable CFI check */
5689                 rctl = rd32(E1000_RCTL);
5690                 rctl &= ~E1000_RCTL_CFIEN;
5691                 wr32(E1000_RCTL, rctl);
5692         } else {
5693                 /* disable VLAN tag insert/strip */
5694                 ctrl = rd32(E1000_CTRL);
5695                 ctrl &= ~E1000_CTRL_VME;
5696                 wr32(E1000_CTRL, ctrl);
5697         }
5698
5699         igb_rlpml_set(adapter);
5700
5701         if (!test_bit(__IGB_DOWN, &adapter->state))
5702                 igb_irq_enable(adapter);
5703 }
5704
5705 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5706 {
5707         struct igb_adapter *adapter = netdev_priv(netdev);
5708         struct e1000_hw *hw = &adapter->hw;
5709         int pf_id = adapter->vfs_allocated_count;
5710
5711         /* attempt to add filter to vlvf array */
5712         igb_vlvf_set(adapter, vid, true, pf_id);
5713
5714         /* add the filter since PF can receive vlans w/o entry in vlvf */
5715         igb_vfta_set(hw, vid, true);
5716 }
5717
5718 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5719 {
5720         struct igb_adapter *adapter = netdev_priv(netdev);
5721         struct e1000_hw *hw = &adapter->hw;
5722         int pf_id = adapter->vfs_allocated_count;
5723         s32 err;
5724
5725         igb_irq_disable(adapter);
5726         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5727
5728         if (!test_bit(__IGB_DOWN, &adapter->state))
5729                 igb_irq_enable(adapter);
5730
5731         /* remove vlan from VLVF table array */
5732         err = igb_vlvf_set(adapter, vid, false, pf_id);
5733
5734         /* if vid was not present in VLVF just remove it from table */
5735         if (err)
5736                 igb_vfta_set(hw, vid, false);
5737 }
5738
5739 static void igb_restore_vlan(struct igb_adapter *adapter)
5740 {
5741         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5742
5743         if (adapter->vlgrp) {
5744                 u16 vid;
5745                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5746                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5747                                 continue;
5748                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5749                 }
5750         }
5751 }
5752
5753 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5754 {
5755         struct pci_dev *pdev = adapter->pdev;
5756         struct e1000_mac_info *mac = &adapter->hw.mac;
5757
5758         mac->autoneg = 0;
5759
5760         switch (spddplx) {
5761         case SPEED_10 + DUPLEX_HALF:
5762                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5763                 break;
5764         case SPEED_10 + DUPLEX_FULL:
5765                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5766                 break;
5767         case SPEED_100 + DUPLEX_HALF:
5768                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5769                 break;
5770         case SPEED_100 + DUPLEX_FULL:
5771                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5772                 break;
5773         case SPEED_1000 + DUPLEX_FULL:
5774                 mac->autoneg = 1;
5775                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5776                 break;
5777         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5778         default:
5779                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5780                 return -EINVAL;
5781         }
5782         return 0;
5783 }
5784
5785 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5786 {
5787         struct net_device *netdev = pci_get_drvdata(pdev);
5788         struct igb_adapter *adapter = netdev_priv(netdev);
5789         struct e1000_hw *hw = &adapter->hw;
5790         u32 ctrl, rctl, status;
5791         u32 wufc = adapter->wol;
5792 #ifdef CONFIG_PM
5793         int retval = 0;
5794 #endif
5795
5796         netif_device_detach(netdev);
5797
5798         if (netif_running(netdev))
5799                 igb_close(netdev);
5800
5801         igb_clear_interrupt_scheme(adapter);
5802
5803 #ifdef CONFIG_PM
5804         retval = pci_save_state(pdev);
5805         if (retval)
5806                 return retval;
5807 #endif
5808
5809         status = rd32(E1000_STATUS);
5810         if (status & E1000_STATUS_LU)
5811                 wufc &= ~E1000_WUFC_LNKC;
5812
5813         if (wufc) {
5814                 igb_setup_rctl(adapter);
5815                 igb_set_rx_mode(netdev);
5816
5817                 /* turn on all-multi mode if wake on multicast is enabled */
5818                 if (wufc & E1000_WUFC_MC) {
5819                         rctl = rd32(E1000_RCTL);
5820                         rctl |= E1000_RCTL_MPE;
5821                         wr32(E1000_RCTL, rctl);
5822                 }
5823
5824                 ctrl = rd32(E1000_CTRL);
5825                 /* advertise wake from D3Cold */
5826                 #define E1000_CTRL_ADVD3WUC 0x00100000
5827                 /* phy power management enable */
5828                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5829                 ctrl |= E1000_CTRL_ADVD3WUC;
5830                 wr32(E1000_CTRL, ctrl);
5831
5832                 /* Allow time for pending master requests to run */
5833                 igb_disable_pcie_master(hw);
5834
5835                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5836                 wr32(E1000_WUFC, wufc);
5837         } else {
5838                 wr32(E1000_WUC, 0);
5839                 wr32(E1000_WUFC, 0);
5840         }
5841
5842         *enable_wake = wufc || adapter->en_mng_pt;
5843         if (!*enable_wake)
5844                 igb_power_down_link(adapter);
5845         else
5846                 igb_power_up_link(adapter);
5847
5848         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5849          * would have already happened in close and is redundant. */
5850         igb_release_hw_control(adapter);
5851
5852         pci_disable_device(pdev);
5853
5854         return 0;
5855 }
5856
5857 #ifdef CONFIG_PM
5858 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5859 {
5860         int retval;
5861         bool wake;
5862
5863         retval = __igb_shutdown(pdev, &wake);
5864         if (retval)
5865                 return retval;
5866
5867         if (wake) {
5868                 pci_prepare_to_sleep(pdev);
5869         } else {
5870                 pci_wake_from_d3(pdev, false);
5871                 pci_set_power_state(pdev, PCI_D3hot);
5872         }
5873
5874         return 0;
5875 }
5876
5877 static int igb_resume(struct pci_dev *pdev)
5878 {
5879         struct net_device *netdev = pci_get_drvdata(pdev);
5880         struct igb_adapter *adapter = netdev_priv(netdev);
5881         struct e1000_hw *hw = &adapter->hw;
5882         u32 err;
5883
5884         pci_set_power_state(pdev, PCI_D0);
5885         pci_restore_state(pdev);
5886         pci_save_state(pdev);
5887
5888         err = pci_enable_device_mem(pdev);
5889         if (err) {
5890                 dev_err(&pdev->dev,
5891                         "igb: Cannot enable PCI device from suspend\n");
5892                 return err;
5893         }
5894         pci_set_master(pdev);
5895
5896         pci_enable_wake(pdev, PCI_D3hot, 0);
5897         pci_enable_wake(pdev, PCI_D3cold, 0);
5898
5899         if (igb_init_interrupt_scheme(adapter)) {
5900                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5901                 return -ENOMEM;
5902         }
5903
5904         igb_reset(adapter);
5905
5906         /* let the f/w know that the h/w is now under the control of the
5907          * driver. */
5908         igb_get_hw_control(adapter);
5909
5910         wr32(E1000_WUS, ~0);
5911
5912         if (netif_running(netdev)) {
5913                 err = igb_open(netdev);
5914                 if (err)
5915                         return err;
5916         }
5917
5918         netif_device_attach(netdev);
5919
5920         return 0;
5921 }
5922 #endif
5923
5924 static void igb_shutdown(struct pci_dev *pdev)
5925 {
5926         bool wake;
5927
5928         __igb_shutdown(pdev, &wake);
5929
5930         if (system_state == SYSTEM_POWER_OFF) {
5931                 pci_wake_from_d3(pdev, wake);
5932                 pci_set_power_state(pdev, PCI_D3hot);
5933         }
5934 }
5935
5936 #ifdef CONFIG_NET_POLL_CONTROLLER
5937 /*
5938  * Polling 'interrupt' - used by things like netconsole to send skbs
5939  * without having to re-enable interrupts. It's not called while
5940  * the interrupt routine is executing.
5941  */
5942 static void igb_netpoll(struct net_device *netdev)
5943 {
5944         struct igb_adapter *adapter = netdev_priv(netdev);
5945         struct e1000_hw *hw = &adapter->hw;
5946         int i;
5947
5948         if (!adapter->msix_entries) {
5949                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5950                 igb_irq_disable(adapter);
5951                 napi_schedule(&q_vector->napi);
5952                 return;
5953         }
5954
5955         for (i = 0; i < adapter->num_q_vectors; i++) {
5956                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5957                 wr32(E1000_EIMC, q_vector->eims_value);
5958                 napi_schedule(&q_vector->napi);
5959         }
5960 }
5961 #endif /* CONFIG_NET_POLL_CONTROLLER */
5962
5963 /**
5964  * igb_io_error_detected - called when PCI error is detected
5965  * @pdev: Pointer to PCI device
5966  * @state: The current pci connection state
5967  *
5968  * This function is called after a PCI bus error affecting
5969  * this device has been detected.
5970  */
5971 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5972                                               pci_channel_state_t state)
5973 {
5974         struct net_device *netdev = pci_get_drvdata(pdev);
5975         struct igb_adapter *adapter = netdev_priv(netdev);
5976
5977         netif_device_detach(netdev);
5978
5979         if (state == pci_channel_io_perm_failure)
5980                 return PCI_ERS_RESULT_DISCONNECT;
5981
5982         if (netif_running(netdev))
5983                 igb_down(adapter);
5984         pci_disable_device(pdev);
5985
5986         /* Request a slot slot reset. */
5987         return PCI_ERS_RESULT_NEED_RESET;
5988 }
5989
5990 /**
5991  * igb_io_slot_reset - called after the pci bus has been reset.
5992  * @pdev: Pointer to PCI device
5993  *
5994  * Restart the card from scratch, as if from a cold-boot. Implementation
5995  * resembles the first-half of the igb_resume routine.
5996  */
5997 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
5998 {
5999         struct net_device *netdev = pci_get_drvdata(pdev);
6000         struct igb_adapter *adapter = netdev_priv(netdev);
6001         struct e1000_hw *hw = &adapter->hw;
6002         pci_ers_result_t result;
6003         int err;
6004
6005         if (pci_enable_device_mem(pdev)) {
6006                 dev_err(&pdev->dev,
6007                         "Cannot re-enable PCI device after reset.\n");
6008                 result = PCI_ERS_RESULT_DISCONNECT;
6009         } else {
6010                 pci_set_master(pdev);
6011                 pci_restore_state(pdev);
6012                 pci_save_state(pdev);
6013
6014                 pci_enable_wake(pdev, PCI_D3hot, 0);
6015                 pci_enable_wake(pdev, PCI_D3cold, 0);
6016
6017                 igb_reset(adapter);
6018                 wr32(E1000_WUS, ~0);
6019                 result = PCI_ERS_RESULT_RECOVERED;
6020         }
6021
6022         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6023         if (err) {
6024                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6025                         "failed 0x%0x\n", err);
6026                 /* non-fatal, continue */
6027         }
6028
6029         return result;
6030 }
6031
6032 /**
6033  * igb_io_resume - called when traffic can start flowing again.
6034  * @pdev: Pointer to PCI device
6035  *
6036  * This callback is called when the error recovery driver tells us that
6037  * its OK to resume normal operation. Implementation resembles the
6038  * second-half of the igb_resume routine.
6039  */
6040 static void igb_io_resume(struct pci_dev *pdev)
6041 {
6042         struct net_device *netdev = pci_get_drvdata(pdev);
6043         struct igb_adapter *adapter = netdev_priv(netdev);
6044
6045         if (netif_running(netdev)) {
6046                 if (igb_up(adapter)) {
6047                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6048                         return;
6049                 }
6050         }
6051
6052         netif_device_attach(netdev);
6053
6054         /* let the f/w know that the h/w is now under the control of the
6055          * driver. */
6056         igb_get_hw_control(adapter);
6057 }
6058
6059 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6060                              u8 qsel)
6061 {
6062         u32 rar_low, rar_high;
6063         struct e1000_hw *hw = &adapter->hw;
6064
6065         /* HW expects these in little endian so we reverse the byte order
6066          * from network order (big endian) to little endian
6067          */
6068         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6069                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6070         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6071
6072         /* Indicate to hardware the Address is Valid. */
6073         rar_high |= E1000_RAH_AV;
6074
6075         if (hw->mac.type == e1000_82575)
6076                 rar_high |= E1000_RAH_POOL_1 * qsel;
6077         else
6078                 rar_high |= E1000_RAH_POOL_1 << qsel;
6079
6080         wr32(E1000_RAL(index), rar_low);
6081         wrfl();
6082         wr32(E1000_RAH(index), rar_high);
6083         wrfl();
6084 }
6085
6086 static int igb_set_vf_mac(struct igb_adapter *adapter,
6087                           int vf, unsigned char *mac_addr)
6088 {
6089         struct e1000_hw *hw = &adapter->hw;
6090         /* VF MAC addresses start at end of receive addresses and moves
6091          * torwards the first, as a result a collision should not be possible */
6092         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6093
6094         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6095
6096         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6097
6098         return 0;
6099 }
6100
6101 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6102 {
6103         struct igb_adapter *adapter = netdev_priv(netdev);
6104         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6105                 return -EINVAL;
6106         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6107         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6108         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6109                                       " change effective.");
6110         if (test_bit(__IGB_DOWN, &adapter->state)) {
6111                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6112                          " but the PF device is not up.\n");
6113                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6114                          " attempting to use the VF device.\n");
6115         }
6116         return igb_set_vf_mac(adapter, vf, mac);
6117 }
6118
6119 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6120 {
6121         return -EOPNOTSUPP;
6122 }
6123
6124 static int igb_ndo_get_vf_config(struct net_device *netdev,
6125                                  int vf, struct ifla_vf_info *ivi)
6126 {
6127         struct igb_adapter *adapter = netdev_priv(netdev);
6128         if (vf >= adapter->vfs_allocated_count)
6129                 return -EINVAL;
6130         ivi->vf = vf;
6131         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6132         ivi->tx_rate = 0;
6133         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6134         ivi->qos = adapter->vf_data[vf].pf_qos;
6135         return 0;
6136 }
6137
6138 static void igb_vmm_control(struct igb_adapter *adapter)
6139 {
6140         struct e1000_hw *hw = &adapter->hw;
6141         u32 reg;
6142
6143         /* replication is not supported for 82575 */
6144         if (hw->mac.type == e1000_82575)
6145                 return;
6146
6147         /* enable replication vlan tag stripping */
6148         reg = rd32(E1000_RPLOLR);
6149         reg |= E1000_RPLOLR_STRVLAN;
6150         wr32(E1000_RPLOLR, reg);
6151
6152         /* notify HW that the MAC is adding vlan tags */
6153         reg = rd32(E1000_DTXCTL);
6154         reg |= E1000_DTXCTL_VLAN_ADDED;
6155         wr32(E1000_DTXCTL, reg);
6156
6157         if (adapter->vfs_allocated_count) {
6158                 igb_vmdq_set_loopback_pf(hw, true);
6159                 igb_vmdq_set_replication_pf(hw, true);
6160         } else {
6161                 igb_vmdq_set_loopback_pf(hw, false);
6162                 igb_vmdq_set_replication_pf(hw, false);
6163         }
6164 }
6165
6166 /* igb_main.c */