]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
Merge branch 'master' of master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
85         /* required last entry */
86         {0, }
87 };
88
89 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
90
91 void igb_reset(struct igb_adapter *);
92 static int igb_setup_all_tx_resources(struct igb_adapter *);
93 static int igb_setup_all_rx_resources(struct igb_adapter *);
94 static void igb_free_all_tx_resources(struct igb_adapter *);
95 static void igb_free_all_rx_resources(struct igb_adapter *);
96 static void igb_setup_mrqc(struct igb_adapter *);
97 void igb_update_stats(struct igb_adapter *);
98 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
99 static void __devexit igb_remove(struct pci_dev *pdev);
100 static int igb_sw_init(struct igb_adapter *);
101 static int igb_open(struct net_device *);
102 static int igb_close(struct net_device *);
103 static void igb_configure_tx(struct igb_adapter *);
104 static void igb_configure_rx(struct igb_adapter *);
105 static void igb_clean_all_tx_rings(struct igb_adapter *);
106 static void igb_clean_all_rx_rings(struct igb_adapter *);
107 static void igb_clean_tx_ring(struct igb_ring *);
108 static void igb_clean_rx_ring(struct igb_ring *);
109 static void igb_set_rx_mode(struct net_device *);
110 static void igb_update_phy_info(unsigned long);
111 static void igb_watchdog(unsigned long);
112 static void igb_watchdog_task(struct work_struct *);
113 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
114 static struct net_device_stats *igb_get_stats(struct net_device *);
115 static int igb_change_mtu(struct net_device *, int);
116 static int igb_set_mac(struct net_device *, void *);
117 static void igb_set_uta(struct igb_adapter *adapter);
118 static irqreturn_t igb_intr(int irq, void *);
119 static irqreturn_t igb_intr_msi(int irq, void *);
120 static irqreturn_t igb_msix_other(int irq, void *);
121 static irqreturn_t igb_msix_ring(int irq, void *);
122 #ifdef CONFIG_IGB_DCA
123 static void igb_update_dca(struct igb_q_vector *);
124 static void igb_setup_dca(struct igb_adapter *);
125 #endif /* CONFIG_IGB_DCA */
126 static bool igb_clean_tx_irq(struct igb_q_vector *);
127 static int igb_poll(struct napi_struct *, int);
128 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
129 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
130 static void igb_tx_timeout(struct net_device *);
131 static void igb_reset_task(struct work_struct *);
132 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
133 static void igb_vlan_rx_add_vid(struct net_device *, u16);
134 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
135 static void igb_restore_vlan(struct igb_adapter *);
136 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
137 static void igb_ping_all_vfs(struct igb_adapter *);
138 static void igb_msg_task(struct igb_adapter *);
139 static void igb_vmm_control(struct igb_adapter *);
140 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
141 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
142 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
143 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
144                                int vf, u16 vlan, u8 qos);
145 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
146 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
147                                  struct ifla_vf_info *ivi);
148
149 #ifdef CONFIG_PM
150 static int igb_suspend(struct pci_dev *, pm_message_t);
151 static int igb_resume(struct pci_dev *);
152 #endif
153 static void igb_shutdown(struct pci_dev *);
154 #ifdef CONFIG_IGB_DCA
155 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
156 static struct notifier_block dca_notifier = {
157         .notifier_call  = igb_notify_dca,
158         .next           = NULL,
159         .priority       = 0
160 };
161 #endif
162 #ifdef CONFIG_NET_POLL_CONTROLLER
163 /* for netdump / net console */
164 static void igb_netpoll(struct net_device *);
165 #endif
166 #ifdef CONFIG_PCI_IOV
167 static unsigned int max_vfs = 0;
168 module_param(max_vfs, uint, 0);
169 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
170                  "per physical function");
171 #endif /* CONFIG_PCI_IOV */
172
173 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
174                      pci_channel_state_t);
175 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
176 static void igb_io_resume(struct pci_dev *);
177
178 static struct pci_error_handlers igb_err_handler = {
179         .error_detected = igb_io_error_detected,
180         .slot_reset = igb_io_slot_reset,
181         .resume = igb_io_resume,
182 };
183
184
185 static struct pci_driver igb_driver = {
186         .name     = igb_driver_name,
187         .id_table = igb_pci_tbl,
188         .probe    = igb_probe,
189         .remove   = __devexit_p(igb_remove),
190 #ifdef CONFIG_PM
191         /* Power Managment Hooks */
192         .suspend  = igb_suspend,
193         .resume   = igb_resume,
194 #endif
195         .shutdown = igb_shutdown,
196         .err_handler = &igb_err_handler
197 };
198
199 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
200 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
201 MODULE_LICENSE("GPL");
202 MODULE_VERSION(DRV_VERSION);
203
204 /**
205  * igb_read_clock - read raw cycle counter (to be used by time counter)
206  */
207 static cycle_t igb_read_clock(const struct cyclecounter *tc)
208 {
209         struct igb_adapter *adapter =
210                 container_of(tc, struct igb_adapter, cycles);
211         struct e1000_hw *hw = &adapter->hw;
212         u64 stamp = 0;
213         int shift = 0;
214
215         /*
216          * The timestamp latches on lowest register read. For the 82580
217          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
218          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
219          */
220         if (hw->mac.type == e1000_82580) {
221                 stamp = rd32(E1000_SYSTIMR) >> 8;
222                 shift = IGB_82580_TSYNC_SHIFT;
223         }
224
225         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
226         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
227         return stamp;
228 }
229
230 /**
231  * igb_get_hw_dev - return device
232  * used by hardware layer to print debugging information
233  **/
234 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
235 {
236         struct igb_adapter *adapter = hw->back;
237         return adapter->netdev;
238 }
239
240 /**
241  * igb_init_module - Driver Registration Routine
242  *
243  * igb_init_module is the first routine called when the driver is
244  * loaded. All it does is register with the PCI subsystem.
245  **/
246 static int __init igb_init_module(void)
247 {
248         int ret;
249         printk(KERN_INFO "%s - version %s\n",
250                igb_driver_string, igb_driver_version);
251
252         printk(KERN_INFO "%s\n", igb_copyright);
253
254 #ifdef CONFIG_IGB_DCA
255         dca_register_notify(&dca_notifier);
256 #endif
257         ret = pci_register_driver(&igb_driver);
258         return ret;
259 }
260
261 module_init(igb_init_module);
262
263 /**
264  * igb_exit_module - Driver Exit Cleanup Routine
265  *
266  * igb_exit_module is called just before the driver is removed
267  * from memory.
268  **/
269 static void __exit igb_exit_module(void)
270 {
271 #ifdef CONFIG_IGB_DCA
272         dca_unregister_notify(&dca_notifier);
273 #endif
274         pci_unregister_driver(&igb_driver);
275 }
276
277 module_exit(igb_exit_module);
278
279 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
280 /**
281  * igb_cache_ring_register - Descriptor ring to register mapping
282  * @adapter: board private structure to initialize
283  *
284  * Once we know the feature-set enabled for the device, we'll cache
285  * the register offset the descriptor ring is assigned to.
286  **/
287 static void igb_cache_ring_register(struct igb_adapter *adapter)
288 {
289         int i = 0, j = 0;
290         u32 rbase_offset = adapter->vfs_allocated_count;
291
292         switch (adapter->hw.mac.type) {
293         case e1000_82576:
294                 /* The queues are allocated for virtualization such that VF 0
295                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
296                  * In order to avoid collision we start at the first free queue
297                  * and continue consuming queues in the same sequence
298                  */
299                 if (adapter->vfs_allocated_count) {
300                         for (; i < adapter->rss_queues; i++)
301                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
302                                                                Q_IDX_82576(i);
303                         for (; j < adapter->rss_queues; j++)
304                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
305                                                                Q_IDX_82576(j);
306                 }
307         case e1000_82575:
308         case e1000_82580:
309         case e1000_i350:
310         default:
311                 for (; i < adapter->num_rx_queues; i++)
312                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
313                 for (; j < adapter->num_tx_queues; j++)
314                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
315                 break;
316         }
317 }
318
319 static void igb_free_queues(struct igb_adapter *adapter)
320 {
321         int i;
322
323         for (i = 0; i < adapter->num_tx_queues; i++) {
324                 kfree(adapter->tx_ring[i]);
325                 adapter->tx_ring[i] = NULL;
326         }
327         for (i = 0; i < adapter->num_rx_queues; i++) {
328                 kfree(adapter->rx_ring[i]);
329                 adapter->rx_ring[i] = NULL;
330         }
331         adapter->num_rx_queues = 0;
332         adapter->num_tx_queues = 0;
333 }
334
335 /**
336  * igb_alloc_queues - Allocate memory for all rings
337  * @adapter: board private structure to initialize
338  *
339  * We allocate one ring per queue at run-time since we don't know the
340  * number of queues at compile-time.
341  **/
342 static int igb_alloc_queues(struct igb_adapter *adapter)
343 {
344         struct igb_ring *ring;
345         int i;
346
347         for (i = 0; i < adapter->num_tx_queues; i++) {
348                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
349                 if (!ring)
350                         goto err;
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358                 adapter->tx_ring[i] = ring;
359         }
360
361         for (i = 0; i < adapter->num_rx_queues; i++) {
362                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
363                 if (!ring)
364                         goto err;
365                 ring->count = adapter->rx_ring_count;
366                 ring->queue_index = i;
367                 ring->pdev = adapter->pdev;
368                 ring->netdev = adapter->netdev;
369                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
370                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
371                 /* set flag indicating ring supports SCTP checksum offload */
372                 if (adapter->hw.mac.type >= e1000_82576)
373                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
374                 adapter->rx_ring[i] = ring;
375         }
376
377         igb_cache_ring_register(adapter);
378
379         return 0;
380
381 err:
382         igb_free_queues(adapter);
383
384         return -ENOMEM;
385 }
386
387 #define IGB_N0_QUEUE -1
388 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
389 {
390         u32 msixbm = 0;
391         struct igb_adapter *adapter = q_vector->adapter;
392         struct e1000_hw *hw = &adapter->hw;
393         u32 ivar, index;
394         int rx_queue = IGB_N0_QUEUE;
395         int tx_queue = IGB_N0_QUEUE;
396
397         if (q_vector->rx_ring)
398                 rx_queue = q_vector->rx_ring->reg_idx;
399         if (q_vector->tx_ring)
400                 tx_queue = q_vector->tx_ring->reg_idx;
401
402         switch (hw->mac.type) {
403         case e1000_82575:
404                 /* The 82575 assigns vectors using a bitmask, which matches the
405                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
406                    or more queues to a vector, we write the appropriate bits
407                    into the MSIXBM register for that vector. */
408                 if (rx_queue > IGB_N0_QUEUE)
409                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
410                 if (tx_queue > IGB_N0_QUEUE)
411                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
412                 if (!adapter->msix_entries && msix_vector == 0)
413                         msixbm |= E1000_EIMS_OTHER;
414                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
415                 q_vector->eims_value = msixbm;
416                 break;
417         case e1000_82576:
418                 /* 82576 uses a table-based method for assigning vectors.
419                    Each queue has a single entry in the table to which we write
420                    a vector number along with a "valid" bit.  Sadly, the layout
421                    of the table is somewhat counterintuitive. */
422                 if (rx_queue > IGB_N0_QUEUE) {
423                         index = (rx_queue & 0x7);
424                         ivar = array_rd32(E1000_IVAR0, index);
425                         if (rx_queue < 8) {
426                                 /* vector goes into low byte of register */
427                                 ivar = ivar & 0xFFFFFF00;
428                                 ivar |= msix_vector | E1000_IVAR_VALID;
429                         } else {
430                                 /* vector goes into third byte of register */
431                                 ivar = ivar & 0xFF00FFFF;
432                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
433                         }
434                         array_wr32(E1000_IVAR0, index, ivar);
435                 }
436                 if (tx_queue > IGB_N0_QUEUE) {
437                         index = (tx_queue & 0x7);
438                         ivar = array_rd32(E1000_IVAR0, index);
439                         if (tx_queue < 8) {
440                                 /* vector goes into second byte of register */
441                                 ivar = ivar & 0xFFFF00FF;
442                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
443                         } else {
444                                 /* vector goes into high byte of register */
445                                 ivar = ivar & 0x00FFFFFF;
446                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
447                         }
448                         array_wr32(E1000_IVAR0, index, ivar);
449                 }
450                 q_vector->eims_value = 1 << msix_vector;
451                 break;
452         case e1000_82580:
453         case e1000_i350:
454                 /* 82580 uses the same table-based approach as 82576 but has fewer
455                    entries as a result we carry over for queues greater than 4. */
456                 if (rx_queue > IGB_N0_QUEUE) {
457                         index = (rx_queue >> 1);
458                         ivar = array_rd32(E1000_IVAR0, index);
459                         if (rx_queue & 0x1) {
460                                 /* vector goes into third byte of register */
461                                 ivar = ivar & 0xFF00FFFF;
462                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
463                         } else {
464                                 /* vector goes into low byte of register */
465                                 ivar = ivar & 0xFFFFFF00;
466                                 ivar |= msix_vector | E1000_IVAR_VALID;
467                         }
468                         array_wr32(E1000_IVAR0, index, ivar);
469                 }
470                 if (tx_queue > IGB_N0_QUEUE) {
471                         index = (tx_queue >> 1);
472                         ivar = array_rd32(E1000_IVAR0, index);
473                         if (tx_queue & 0x1) {
474                                 /* vector goes into high byte of register */
475                                 ivar = ivar & 0x00FFFFFF;
476                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
477                         } else {
478                                 /* vector goes into second byte of register */
479                                 ivar = ivar & 0xFFFF00FF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
481                         }
482                         array_wr32(E1000_IVAR0, index, ivar);
483                 }
484                 q_vector->eims_value = 1 << msix_vector;
485                 break;
486         default:
487                 BUG();
488                 break;
489         }
490
491         /* add q_vector eims value to global eims_enable_mask */
492         adapter->eims_enable_mask |= q_vector->eims_value;
493
494         /* configure q_vector to set itr on first interrupt */
495         q_vector->set_itr = 1;
496 }
497
498 /**
499  * igb_configure_msix - Configure MSI-X hardware
500  *
501  * igb_configure_msix sets up the hardware to properly
502  * generate MSI-X interrupts.
503  **/
504 static void igb_configure_msix(struct igb_adapter *adapter)
505 {
506         u32 tmp;
507         int i, vector = 0;
508         struct e1000_hw *hw = &adapter->hw;
509
510         adapter->eims_enable_mask = 0;
511
512         /* set vector for other causes, i.e. link changes */
513         switch (hw->mac.type) {
514         case e1000_82575:
515                 tmp = rd32(E1000_CTRL_EXT);
516                 /* enable MSI-X PBA support*/
517                 tmp |= E1000_CTRL_EXT_PBA_CLR;
518
519                 /* Auto-Mask interrupts upon ICR read. */
520                 tmp |= E1000_CTRL_EXT_EIAME;
521                 tmp |= E1000_CTRL_EXT_IRCA;
522
523                 wr32(E1000_CTRL_EXT, tmp);
524
525                 /* enable msix_other interrupt */
526                 array_wr32(E1000_MSIXBM(0), vector++,
527                                       E1000_EIMS_OTHER);
528                 adapter->eims_other = E1000_EIMS_OTHER;
529
530                 break;
531
532         case e1000_82576:
533         case e1000_82580:
534         case e1000_i350:
535                 /* Turn on MSI-X capability first, or our settings
536                  * won't stick.  And it will take days to debug. */
537                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
538                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
539                                 E1000_GPIE_NSICR);
540
541                 /* enable msix_other interrupt */
542                 adapter->eims_other = 1 << vector;
543                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
544
545                 wr32(E1000_IVAR_MISC, tmp);
546                 break;
547         default:
548                 /* do nothing, since nothing else supports MSI-X */
549                 break;
550         } /* switch (hw->mac.type) */
551
552         adapter->eims_enable_mask |= adapter->eims_other;
553
554         for (i = 0; i < adapter->num_q_vectors; i++)
555                 igb_assign_vector(adapter->q_vector[i], vector++);
556
557         wrfl();
558 }
559
560 /**
561  * igb_request_msix - Initialize MSI-X interrupts
562  *
563  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
564  * kernel.
565  **/
566 static int igb_request_msix(struct igb_adapter *adapter)
567 {
568         struct net_device *netdev = adapter->netdev;
569         struct e1000_hw *hw = &adapter->hw;
570         int i, err = 0, vector = 0;
571
572         err = request_irq(adapter->msix_entries[vector].vector,
573                           igb_msix_other, 0, netdev->name, adapter);
574         if (err)
575                 goto out;
576         vector++;
577
578         for (i = 0; i < adapter->num_q_vectors; i++) {
579                 struct igb_q_vector *q_vector = adapter->q_vector[i];
580
581                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
582
583                 if (q_vector->rx_ring && q_vector->tx_ring)
584                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
585                                 q_vector->rx_ring->queue_index);
586                 else if (q_vector->tx_ring)
587                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
588                                 q_vector->tx_ring->queue_index);
589                 else if (q_vector->rx_ring)
590                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
591                                 q_vector->rx_ring->queue_index);
592                 else
593                         sprintf(q_vector->name, "%s-unused", netdev->name);
594
595                 err = request_irq(adapter->msix_entries[vector].vector,
596                                   igb_msix_ring, 0, q_vector->name,
597                                   q_vector);
598                 if (err)
599                         goto out;
600                 vector++;
601         }
602
603         igb_configure_msix(adapter);
604         return 0;
605 out:
606         return err;
607 }
608
609 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
610 {
611         if (adapter->msix_entries) {
612                 pci_disable_msix(adapter->pdev);
613                 kfree(adapter->msix_entries);
614                 adapter->msix_entries = NULL;
615         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
616                 pci_disable_msi(adapter->pdev);
617         }
618 }
619
620 /**
621  * igb_free_q_vectors - Free memory allocated for interrupt vectors
622  * @adapter: board private structure to initialize
623  *
624  * This function frees the memory allocated to the q_vectors.  In addition if
625  * NAPI is enabled it will delete any references to the NAPI struct prior
626  * to freeing the q_vector.
627  **/
628 static void igb_free_q_vectors(struct igb_adapter *adapter)
629 {
630         int v_idx;
631
632         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
633                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
634                 adapter->q_vector[v_idx] = NULL;
635                 if (!q_vector)
636                         continue;
637                 netif_napi_del(&q_vector->napi);
638                 kfree(q_vector);
639         }
640         adapter->num_q_vectors = 0;
641 }
642
643 /**
644  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
645  *
646  * This function resets the device so that it has 0 rx queues, tx queues, and
647  * MSI-X interrupts allocated.
648  */
649 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
650 {
651         igb_free_queues(adapter);
652         igb_free_q_vectors(adapter);
653         igb_reset_interrupt_capability(adapter);
654 }
655
656 /**
657  * igb_set_interrupt_capability - set MSI or MSI-X if supported
658  *
659  * Attempt to configure interrupts using the best available
660  * capabilities of the hardware and kernel.
661  **/
662 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
663 {
664         int err;
665         int numvecs, i;
666
667         /* Number of supported queues. */
668         adapter->num_rx_queues = adapter->rss_queues;
669         adapter->num_tx_queues = adapter->rss_queues;
670
671         /* start with one vector for every rx queue */
672         numvecs = adapter->num_rx_queues;
673
674         /* if tx handler is separate add 1 for every tx queue */
675         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
676                 numvecs += adapter->num_tx_queues;
677
678         /* store the number of vectors reserved for queues */
679         adapter->num_q_vectors = numvecs;
680
681         /* add 1 vector for link status interrupts */
682         numvecs++;
683         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
684                                         GFP_KERNEL);
685         if (!adapter->msix_entries)
686                 goto msi_only;
687
688         for (i = 0; i < numvecs; i++)
689                 adapter->msix_entries[i].entry = i;
690
691         err = pci_enable_msix(adapter->pdev,
692                               adapter->msix_entries,
693                               numvecs);
694         if (err == 0)
695                 goto out;
696
697         igb_reset_interrupt_capability(adapter);
698
699         /* If we can't do MSI-X, try MSI */
700 msi_only:
701 #ifdef CONFIG_PCI_IOV
702         /* disable SR-IOV for non MSI-X configurations */
703         if (adapter->vf_data) {
704                 struct e1000_hw *hw = &adapter->hw;
705                 /* disable iov and allow time for transactions to clear */
706                 pci_disable_sriov(adapter->pdev);
707                 msleep(500);
708
709                 kfree(adapter->vf_data);
710                 adapter->vf_data = NULL;
711                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
712                 msleep(100);
713                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
714         }
715 #endif
716         adapter->vfs_allocated_count = 0;
717         adapter->rss_queues = 1;
718         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
719         adapter->num_rx_queues = 1;
720         adapter->num_tx_queues = 1;
721         adapter->num_q_vectors = 1;
722         if (!pci_enable_msi(adapter->pdev))
723                 adapter->flags |= IGB_FLAG_HAS_MSI;
724 out:
725         /* Notify the stack of the (possibly) reduced Tx Queue count. */
726         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
727         return;
728 }
729
730 /**
731  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
732  * @adapter: board private structure to initialize
733  *
734  * We allocate one q_vector per queue interrupt.  If allocation fails we
735  * return -ENOMEM.
736  **/
737 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
738 {
739         struct igb_q_vector *q_vector;
740         struct e1000_hw *hw = &adapter->hw;
741         int v_idx;
742
743         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
744                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
745                 if (!q_vector)
746                         goto err_out;
747                 q_vector->adapter = adapter;
748                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
749                 q_vector->itr_val = IGB_START_ITR;
750                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
751                 adapter->q_vector[v_idx] = q_vector;
752         }
753         return 0;
754
755 err_out:
756         igb_free_q_vectors(adapter);
757         return -ENOMEM;
758 }
759
760 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
761                                       int ring_idx, int v_idx)
762 {
763         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
764
765         q_vector->rx_ring = adapter->rx_ring[ring_idx];
766         q_vector->rx_ring->q_vector = q_vector;
767         q_vector->itr_val = adapter->rx_itr_setting;
768         if (q_vector->itr_val && q_vector->itr_val <= 3)
769                 q_vector->itr_val = IGB_START_ITR;
770 }
771
772 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
773                                       int ring_idx, int v_idx)
774 {
775         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
776
777         q_vector->tx_ring = adapter->tx_ring[ring_idx];
778         q_vector->tx_ring->q_vector = q_vector;
779         q_vector->itr_val = adapter->tx_itr_setting;
780         if (q_vector->itr_val && q_vector->itr_val <= 3)
781                 q_vector->itr_val = IGB_START_ITR;
782 }
783
784 /**
785  * igb_map_ring_to_vector - maps allocated queues to vectors
786  *
787  * This function maps the recently allocated queues to vectors.
788  **/
789 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
790 {
791         int i;
792         int v_idx = 0;
793
794         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
795             (adapter->num_q_vectors < adapter->num_tx_queues))
796                 return -ENOMEM;
797
798         if (adapter->num_q_vectors >=
799             (adapter->num_rx_queues + adapter->num_tx_queues)) {
800                 for (i = 0; i < adapter->num_rx_queues; i++)
801                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
802                 for (i = 0; i < adapter->num_tx_queues; i++)
803                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
804         } else {
805                 for (i = 0; i < adapter->num_rx_queues; i++) {
806                         if (i < adapter->num_tx_queues)
807                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
808                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
809                 }
810                 for (; i < adapter->num_tx_queues; i++)
811                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
812         }
813         return 0;
814 }
815
816 /**
817  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
818  *
819  * This function initializes the interrupts and allocates all of the queues.
820  **/
821 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
822 {
823         struct pci_dev *pdev = adapter->pdev;
824         int err;
825
826         igb_set_interrupt_capability(adapter);
827
828         err = igb_alloc_q_vectors(adapter);
829         if (err) {
830                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
831                 goto err_alloc_q_vectors;
832         }
833
834         err = igb_alloc_queues(adapter);
835         if (err) {
836                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
837                 goto err_alloc_queues;
838         }
839
840         err = igb_map_ring_to_vector(adapter);
841         if (err) {
842                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
843                 goto err_map_queues;
844         }
845
846
847         return 0;
848 err_map_queues:
849         igb_free_queues(adapter);
850 err_alloc_queues:
851         igb_free_q_vectors(adapter);
852 err_alloc_q_vectors:
853         igb_reset_interrupt_capability(adapter);
854         return err;
855 }
856
857 /**
858  * igb_request_irq - initialize interrupts
859  *
860  * Attempts to configure interrupts using the best available
861  * capabilities of the hardware and kernel.
862  **/
863 static int igb_request_irq(struct igb_adapter *adapter)
864 {
865         struct net_device *netdev = adapter->netdev;
866         struct pci_dev *pdev = adapter->pdev;
867         int err = 0;
868
869         if (adapter->msix_entries) {
870                 err = igb_request_msix(adapter);
871                 if (!err)
872                         goto request_done;
873                 /* fall back to MSI */
874                 igb_clear_interrupt_scheme(adapter);
875                 if (!pci_enable_msi(adapter->pdev))
876                         adapter->flags |= IGB_FLAG_HAS_MSI;
877                 igb_free_all_tx_resources(adapter);
878                 igb_free_all_rx_resources(adapter);
879                 adapter->num_tx_queues = 1;
880                 adapter->num_rx_queues = 1;
881                 adapter->num_q_vectors = 1;
882                 err = igb_alloc_q_vectors(adapter);
883                 if (err) {
884                         dev_err(&pdev->dev,
885                                 "Unable to allocate memory for vectors\n");
886                         goto request_done;
887                 }
888                 err = igb_alloc_queues(adapter);
889                 if (err) {
890                         dev_err(&pdev->dev,
891                                 "Unable to allocate memory for queues\n");
892                         igb_free_q_vectors(adapter);
893                         goto request_done;
894                 }
895                 igb_setup_all_tx_resources(adapter);
896                 igb_setup_all_rx_resources(adapter);
897         } else {
898                 igb_assign_vector(adapter->q_vector[0], 0);
899         }
900
901         if (adapter->flags & IGB_FLAG_HAS_MSI) {
902                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
903                                   netdev->name, adapter);
904                 if (!err)
905                         goto request_done;
906
907                 /* fall back to legacy interrupts */
908                 igb_reset_interrupt_capability(adapter);
909                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
910         }
911
912         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
913                           netdev->name, adapter);
914
915         if (err)
916                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
917                         err);
918
919 request_done:
920         return err;
921 }
922
923 static void igb_free_irq(struct igb_adapter *adapter)
924 {
925         if (adapter->msix_entries) {
926                 int vector = 0, i;
927
928                 free_irq(adapter->msix_entries[vector++].vector, adapter);
929
930                 for (i = 0; i < adapter->num_q_vectors; i++) {
931                         struct igb_q_vector *q_vector = adapter->q_vector[i];
932                         free_irq(adapter->msix_entries[vector++].vector,
933                                  q_vector);
934                 }
935         } else {
936                 free_irq(adapter->pdev->irq, adapter);
937         }
938 }
939
940 /**
941  * igb_irq_disable - Mask off interrupt generation on the NIC
942  * @adapter: board private structure
943  **/
944 static void igb_irq_disable(struct igb_adapter *adapter)
945 {
946         struct e1000_hw *hw = &adapter->hw;
947
948         /*
949          * we need to be careful when disabling interrupts.  The VFs are also
950          * mapped into these registers and so clearing the bits can cause
951          * issues on the VF drivers so we only need to clear what we set
952          */
953         if (adapter->msix_entries) {
954                 u32 regval = rd32(E1000_EIAM);
955                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
956                 wr32(E1000_EIMC, adapter->eims_enable_mask);
957                 regval = rd32(E1000_EIAC);
958                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
959         }
960
961         wr32(E1000_IAM, 0);
962         wr32(E1000_IMC, ~0);
963         wrfl();
964         synchronize_irq(adapter->pdev->irq);
965 }
966
967 /**
968  * igb_irq_enable - Enable default interrupt generation settings
969  * @adapter: board private structure
970  **/
971 static void igb_irq_enable(struct igb_adapter *adapter)
972 {
973         struct e1000_hw *hw = &adapter->hw;
974
975         if (adapter->msix_entries) {
976                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
977                 u32 regval = rd32(E1000_EIAC);
978                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
979                 regval = rd32(E1000_EIAM);
980                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
981                 wr32(E1000_EIMS, adapter->eims_enable_mask);
982                 if (adapter->vfs_allocated_count) {
983                         wr32(E1000_MBVFIMR, 0xFF);
984                         ims |= E1000_IMS_VMMB;
985                 }
986                 if (adapter->hw.mac.type == e1000_82580)
987                         ims |= E1000_IMS_DRSTA;
988
989                 wr32(E1000_IMS, ims);
990         } else {
991                 wr32(E1000_IMS, IMS_ENABLE_MASK |
992                                 E1000_IMS_DRSTA);
993                 wr32(E1000_IAM, IMS_ENABLE_MASK |
994                                 E1000_IMS_DRSTA);
995         }
996 }
997
998 static void igb_update_mng_vlan(struct igb_adapter *adapter)
999 {
1000         struct e1000_hw *hw = &adapter->hw;
1001         u16 vid = adapter->hw.mng_cookie.vlan_id;
1002         u16 old_vid = adapter->mng_vlan_id;
1003
1004         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1005                 /* add VID to filter table */
1006                 igb_vfta_set(hw, vid, true);
1007                 adapter->mng_vlan_id = vid;
1008         } else {
1009                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1010         }
1011
1012         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1013             (vid != old_vid) &&
1014             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1015                 /* remove VID from filter table */
1016                 igb_vfta_set(hw, old_vid, false);
1017         }
1018 }
1019
1020 /**
1021  * igb_release_hw_control - release control of the h/w to f/w
1022  * @adapter: address of board private structure
1023  *
1024  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1025  * For ASF and Pass Through versions of f/w this means that the
1026  * driver is no longer loaded.
1027  *
1028  **/
1029 static void igb_release_hw_control(struct igb_adapter *adapter)
1030 {
1031         struct e1000_hw *hw = &adapter->hw;
1032         u32 ctrl_ext;
1033
1034         /* Let firmware take over control of h/w */
1035         ctrl_ext = rd32(E1000_CTRL_EXT);
1036         wr32(E1000_CTRL_EXT,
1037                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1038 }
1039
1040 /**
1041  * igb_get_hw_control - get control of the h/w from f/w
1042  * @adapter: address of board private structure
1043  *
1044  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1045  * For ASF and Pass Through versions of f/w this means that
1046  * the driver is loaded.
1047  *
1048  **/
1049 static void igb_get_hw_control(struct igb_adapter *adapter)
1050 {
1051         struct e1000_hw *hw = &adapter->hw;
1052         u32 ctrl_ext;
1053
1054         /* Let firmware know the driver has taken over */
1055         ctrl_ext = rd32(E1000_CTRL_EXT);
1056         wr32(E1000_CTRL_EXT,
1057                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1058 }
1059
1060 /**
1061  * igb_configure - configure the hardware for RX and TX
1062  * @adapter: private board structure
1063  **/
1064 static void igb_configure(struct igb_adapter *adapter)
1065 {
1066         struct net_device *netdev = adapter->netdev;
1067         int i;
1068
1069         igb_get_hw_control(adapter);
1070         igb_set_rx_mode(netdev);
1071
1072         igb_restore_vlan(adapter);
1073
1074         igb_setup_tctl(adapter);
1075         igb_setup_mrqc(adapter);
1076         igb_setup_rctl(adapter);
1077
1078         igb_configure_tx(adapter);
1079         igb_configure_rx(adapter);
1080
1081         igb_rx_fifo_flush_82575(&adapter->hw);
1082
1083         /* call igb_desc_unused which always leaves
1084          * at least 1 descriptor unused to make sure
1085          * next_to_use != next_to_clean */
1086         for (i = 0; i < adapter->num_rx_queues; i++) {
1087                 struct igb_ring *ring = adapter->rx_ring[i];
1088                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1089         }
1090 }
1091
1092 /**
1093  * igb_power_up_link - Power up the phy/serdes link
1094  * @adapter: address of board private structure
1095  **/
1096 void igb_power_up_link(struct igb_adapter *adapter)
1097 {
1098         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1099                 igb_power_up_phy_copper(&adapter->hw);
1100         else
1101                 igb_power_up_serdes_link_82575(&adapter->hw);
1102 }
1103
1104 /**
1105  * igb_power_down_link - Power down the phy/serdes link
1106  * @adapter: address of board private structure
1107  */
1108 static void igb_power_down_link(struct igb_adapter *adapter)
1109 {
1110         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1111                 igb_power_down_phy_copper_82575(&adapter->hw);
1112         else
1113                 igb_shutdown_serdes_link_82575(&adapter->hw);
1114 }
1115
1116 /**
1117  * igb_up - Open the interface and prepare it to handle traffic
1118  * @adapter: board private structure
1119  **/
1120 int igb_up(struct igb_adapter *adapter)
1121 {
1122         struct e1000_hw *hw = &adapter->hw;
1123         int i;
1124
1125         /* hardware has been reset, we need to reload some things */
1126         igb_configure(adapter);
1127
1128         clear_bit(__IGB_DOWN, &adapter->state);
1129
1130         for (i = 0; i < adapter->num_q_vectors; i++) {
1131                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1132                 napi_enable(&q_vector->napi);
1133         }
1134         if (adapter->msix_entries)
1135                 igb_configure_msix(adapter);
1136         else
1137                 igb_assign_vector(adapter->q_vector[0], 0);
1138
1139         /* Clear any pending interrupts. */
1140         rd32(E1000_ICR);
1141         igb_irq_enable(adapter);
1142
1143         /* notify VFs that reset has been completed */
1144         if (adapter->vfs_allocated_count) {
1145                 u32 reg_data = rd32(E1000_CTRL_EXT);
1146                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1147                 wr32(E1000_CTRL_EXT, reg_data);
1148         }
1149
1150         netif_tx_start_all_queues(adapter->netdev);
1151
1152         /* start the watchdog. */
1153         hw->mac.get_link_status = 1;
1154         schedule_work(&adapter->watchdog_task);
1155
1156         return 0;
1157 }
1158
1159 void igb_down(struct igb_adapter *adapter)
1160 {
1161         struct net_device *netdev = adapter->netdev;
1162         struct e1000_hw *hw = &adapter->hw;
1163         u32 tctl, rctl;
1164         int i;
1165
1166         /* signal that we're down so the interrupt handler does not
1167          * reschedule our watchdog timer */
1168         set_bit(__IGB_DOWN, &adapter->state);
1169
1170         /* disable receives in the hardware */
1171         rctl = rd32(E1000_RCTL);
1172         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1173         /* flush and sleep below */
1174
1175         netif_tx_stop_all_queues(netdev);
1176
1177         /* disable transmits in the hardware */
1178         tctl = rd32(E1000_TCTL);
1179         tctl &= ~E1000_TCTL_EN;
1180         wr32(E1000_TCTL, tctl);
1181         /* flush both disables and wait for them to finish */
1182         wrfl();
1183         msleep(10);
1184
1185         for (i = 0; i < adapter->num_q_vectors; i++) {
1186                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1187                 napi_disable(&q_vector->napi);
1188         }
1189
1190         igb_irq_disable(adapter);
1191
1192         del_timer_sync(&adapter->watchdog_timer);
1193         del_timer_sync(&adapter->phy_info_timer);
1194
1195         netif_carrier_off(netdev);
1196
1197         /* record the stats before reset*/
1198         igb_update_stats(adapter);
1199
1200         adapter->link_speed = 0;
1201         adapter->link_duplex = 0;
1202
1203         if (!pci_channel_offline(adapter->pdev))
1204                 igb_reset(adapter);
1205         igb_clean_all_tx_rings(adapter);
1206         igb_clean_all_rx_rings(adapter);
1207 #ifdef CONFIG_IGB_DCA
1208
1209         /* since we reset the hardware DCA settings were cleared */
1210         igb_setup_dca(adapter);
1211 #endif
1212 }
1213
1214 void igb_reinit_locked(struct igb_adapter *adapter)
1215 {
1216         WARN_ON(in_interrupt());
1217         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1218                 msleep(1);
1219         igb_down(adapter);
1220         igb_up(adapter);
1221         clear_bit(__IGB_RESETTING, &adapter->state);
1222 }
1223
1224 void igb_reset(struct igb_adapter *adapter)
1225 {
1226         struct pci_dev *pdev = adapter->pdev;
1227         struct e1000_hw *hw = &adapter->hw;
1228         struct e1000_mac_info *mac = &hw->mac;
1229         struct e1000_fc_info *fc = &hw->fc;
1230         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1231         u16 hwm;
1232
1233         /* Repartition Pba for greater than 9k mtu
1234          * To take effect CTRL.RST is required.
1235          */
1236         switch (mac->type) {
1237         case e1000_i350:
1238         case e1000_82580:
1239                 pba = rd32(E1000_RXPBS);
1240                 pba = igb_rxpbs_adjust_82580(pba);
1241                 break;
1242         case e1000_82576:
1243                 pba = rd32(E1000_RXPBS);
1244                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1245                 break;
1246         case e1000_82575:
1247         default:
1248                 pba = E1000_PBA_34K;
1249                 break;
1250         }
1251
1252         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1253             (mac->type < e1000_82576)) {
1254                 /* adjust PBA for jumbo frames */
1255                 wr32(E1000_PBA, pba);
1256
1257                 /* To maintain wire speed transmits, the Tx FIFO should be
1258                  * large enough to accommodate two full transmit packets,
1259                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1260                  * the Rx FIFO should be large enough to accommodate at least
1261                  * one full receive packet and is similarly rounded up and
1262                  * expressed in KB. */
1263                 pba = rd32(E1000_PBA);
1264                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1265                 tx_space = pba >> 16;
1266                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1267                 pba &= 0xffff;
1268                 /* the tx fifo also stores 16 bytes of information about the tx
1269                  * but don't include ethernet FCS because hardware appends it */
1270                 min_tx_space = (adapter->max_frame_size +
1271                                 sizeof(union e1000_adv_tx_desc) -
1272                                 ETH_FCS_LEN) * 2;
1273                 min_tx_space = ALIGN(min_tx_space, 1024);
1274                 min_tx_space >>= 10;
1275                 /* software strips receive CRC, so leave room for it */
1276                 min_rx_space = adapter->max_frame_size;
1277                 min_rx_space = ALIGN(min_rx_space, 1024);
1278                 min_rx_space >>= 10;
1279
1280                 /* If current Tx allocation is less than the min Tx FIFO size,
1281                  * and the min Tx FIFO size is less than the current Rx FIFO
1282                  * allocation, take space away from current Rx allocation */
1283                 if (tx_space < min_tx_space &&
1284                     ((min_tx_space - tx_space) < pba)) {
1285                         pba = pba - (min_tx_space - tx_space);
1286
1287                         /* if short on rx space, rx wins and must trump tx
1288                          * adjustment */
1289                         if (pba < min_rx_space)
1290                                 pba = min_rx_space;
1291                 }
1292                 wr32(E1000_PBA, pba);
1293         }
1294
1295         /* flow control settings */
1296         /* The high water mark must be low enough to fit one full frame
1297          * (or the size used for early receive) above it in the Rx FIFO.
1298          * Set it to the lower of:
1299          * - 90% of the Rx FIFO size, or
1300          * - the full Rx FIFO size minus one full frame */
1301         hwm = min(((pba << 10) * 9 / 10),
1302                         ((pba << 10) - 2 * adapter->max_frame_size));
1303
1304         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1305         fc->low_water = fc->high_water - 16;
1306         fc->pause_time = 0xFFFF;
1307         fc->send_xon = 1;
1308         fc->current_mode = fc->requested_mode;
1309
1310         /* disable receive for all VFs and wait one second */
1311         if (adapter->vfs_allocated_count) {
1312                 int i;
1313                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1314                         adapter->vf_data[i].flags = 0;
1315
1316                 /* ping all the active vfs to let them know we are going down */
1317                 igb_ping_all_vfs(adapter);
1318
1319                 /* disable transmits and receives */
1320                 wr32(E1000_VFRE, 0);
1321                 wr32(E1000_VFTE, 0);
1322         }
1323
1324         /* Allow time for pending master requests to run */
1325         hw->mac.ops.reset_hw(hw);
1326         wr32(E1000_WUC, 0);
1327
1328         if (hw->mac.ops.init_hw(hw))
1329                 dev_err(&pdev->dev, "Hardware Error\n");
1330
1331         if (hw->mac.type == e1000_82580) {
1332                 u32 reg = rd32(E1000_PCIEMISC);
1333                 wr32(E1000_PCIEMISC,
1334                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1335         }
1336         if (!netif_running(adapter->netdev))
1337                 igb_power_down_link(adapter);
1338
1339         igb_update_mng_vlan(adapter);
1340
1341         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1342         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1343
1344         igb_get_phy_info(hw);
1345 }
1346
1347 static const struct net_device_ops igb_netdev_ops = {
1348         .ndo_open               = igb_open,
1349         .ndo_stop               = igb_close,
1350         .ndo_start_xmit         = igb_xmit_frame_adv,
1351         .ndo_get_stats          = igb_get_stats,
1352         .ndo_set_rx_mode        = igb_set_rx_mode,
1353         .ndo_set_multicast_list = igb_set_rx_mode,
1354         .ndo_set_mac_address    = igb_set_mac,
1355         .ndo_change_mtu         = igb_change_mtu,
1356         .ndo_do_ioctl           = igb_ioctl,
1357         .ndo_tx_timeout         = igb_tx_timeout,
1358         .ndo_validate_addr      = eth_validate_addr,
1359         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1360         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1361         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1362         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1363         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1364         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1365         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1366 #ifdef CONFIG_NET_POLL_CONTROLLER
1367         .ndo_poll_controller    = igb_netpoll,
1368 #endif
1369 };
1370
1371 /**
1372  * igb_probe - Device Initialization Routine
1373  * @pdev: PCI device information struct
1374  * @ent: entry in igb_pci_tbl
1375  *
1376  * Returns 0 on success, negative on failure
1377  *
1378  * igb_probe initializes an adapter identified by a pci_dev structure.
1379  * The OS initialization, configuring of the adapter private structure,
1380  * and a hardware reset occur.
1381  **/
1382 static int __devinit igb_probe(struct pci_dev *pdev,
1383                                const struct pci_device_id *ent)
1384 {
1385         struct net_device *netdev;
1386         struct igb_adapter *adapter;
1387         struct e1000_hw *hw;
1388         u16 eeprom_data = 0;
1389         static int global_quad_port_a; /* global quad port a indication */
1390         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1391         unsigned long mmio_start, mmio_len;
1392         int err, pci_using_dac;
1393         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1394         u32 part_num;
1395
1396         err = pci_enable_device_mem(pdev);
1397         if (err)
1398                 return err;
1399
1400         pci_using_dac = 0;
1401         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1402         if (!err) {
1403                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1404                 if (!err)
1405                         pci_using_dac = 1;
1406         } else {
1407                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1408                 if (err) {
1409                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1410                         if (err) {
1411                                 dev_err(&pdev->dev, "No usable DMA "
1412                                         "configuration, aborting\n");
1413                                 goto err_dma;
1414                         }
1415                 }
1416         }
1417
1418         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1419                                            IORESOURCE_MEM),
1420                                            igb_driver_name);
1421         if (err)
1422                 goto err_pci_reg;
1423
1424         pci_enable_pcie_error_reporting(pdev);
1425
1426         pci_set_master(pdev);
1427         pci_save_state(pdev);
1428
1429         err = -ENOMEM;
1430         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1431                                    IGB_ABS_MAX_TX_QUEUES);
1432         if (!netdev)
1433                 goto err_alloc_etherdev;
1434
1435         SET_NETDEV_DEV(netdev, &pdev->dev);
1436
1437         pci_set_drvdata(pdev, netdev);
1438         adapter = netdev_priv(netdev);
1439         adapter->netdev = netdev;
1440         adapter->pdev = pdev;
1441         hw = &adapter->hw;
1442         hw->back = adapter;
1443         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1444
1445         mmio_start = pci_resource_start(pdev, 0);
1446         mmio_len = pci_resource_len(pdev, 0);
1447
1448         err = -EIO;
1449         hw->hw_addr = ioremap(mmio_start, mmio_len);
1450         if (!hw->hw_addr)
1451                 goto err_ioremap;
1452
1453         netdev->netdev_ops = &igb_netdev_ops;
1454         igb_set_ethtool_ops(netdev);
1455         netdev->watchdog_timeo = 5 * HZ;
1456
1457         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1458
1459         netdev->mem_start = mmio_start;
1460         netdev->mem_end = mmio_start + mmio_len;
1461
1462         /* PCI config space info */
1463         hw->vendor_id = pdev->vendor;
1464         hw->device_id = pdev->device;
1465         hw->revision_id = pdev->revision;
1466         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1467         hw->subsystem_device_id = pdev->subsystem_device;
1468
1469         /* Copy the default MAC, PHY and NVM function pointers */
1470         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1471         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1472         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1473         /* Initialize skew-specific constants */
1474         err = ei->get_invariants(hw);
1475         if (err)
1476                 goto err_sw_init;
1477
1478         /* setup the private structure */
1479         err = igb_sw_init(adapter);
1480         if (err)
1481                 goto err_sw_init;
1482
1483         igb_get_bus_info_pcie(hw);
1484
1485         hw->phy.autoneg_wait_to_complete = false;
1486
1487         /* Copper options */
1488         if (hw->phy.media_type == e1000_media_type_copper) {
1489                 hw->phy.mdix = AUTO_ALL_MODES;
1490                 hw->phy.disable_polarity_correction = false;
1491                 hw->phy.ms_type = e1000_ms_hw_default;
1492         }
1493
1494         if (igb_check_reset_block(hw))
1495                 dev_info(&pdev->dev,
1496                         "PHY reset is blocked due to SOL/IDER session.\n");
1497
1498         netdev->features = NETIF_F_SG |
1499                            NETIF_F_IP_CSUM |
1500                            NETIF_F_HW_VLAN_TX |
1501                            NETIF_F_HW_VLAN_RX |
1502                            NETIF_F_HW_VLAN_FILTER;
1503
1504         netdev->features |= NETIF_F_IPV6_CSUM;
1505         netdev->features |= NETIF_F_TSO;
1506         netdev->features |= NETIF_F_TSO6;
1507         netdev->features |= NETIF_F_GRO;
1508
1509         netdev->vlan_features |= NETIF_F_TSO;
1510         netdev->vlan_features |= NETIF_F_TSO6;
1511         netdev->vlan_features |= NETIF_F_IP_CSUM;
1512         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1513         netdev->vlan_features |= NETIF_F_SG;
1514
1515         if (pci_using_dac)
1516                 netdev->features |= NETIF_F_HIGHDMA;
1517
1518         if (hw->mac.type >= e1000_82576)
1519                 netdev->features |= NETIF_F_SCTP_CSUM;
1520
1521         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1522
1523         /* before reading the NVM, reset the controller to put the device in a
1524          * known good starting state */
1525         hw->mac.ops.reset_hw(hw);
1526
1527         /* make sure the NVM is good */
1528         if (igb_validate_nvm_checksum(hw) < 0) {
1529                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1530                 err = -EIO;
1531                 goto err_eeprom;
1532         }
1533
1534         /* copy the MAC address out of the NVM */
1535         if (hw->mac.ops.read_mac_addr(hw))
1536                 dev_err(&pdev->dev, "NVM Read Error\n");
1537
1538         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1539         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1540
1541         if (!is_valid_ether_addr(netdev->perm_addr)) {
1542                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1543                 err = -EIO;
1544                 goto err_eeprom;
1545         }
1546
1547         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1548                     (unsigned long) adapter);
1549         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1550                     (unsigned long) adapter);
1551
1552         INIT_WORK(&adapter->reset_task, igb_reset_task);
1553         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1554
1555         /* Initialize link properties that are user-changeable */
1556         adapter->fc_autoneg = true;
1557         hw->mac.autoneg = true;
1558         hw->phy.autoneg_advertised = 0x2f;
1559
1560         hw->fc.requested_mode = e1000_fc_default;
1561         hw->fc.current_mode = e1000_fc_default;
1562
1563         igb_validate_mdi_setting(hw);
1564
1565         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1566          * enable the ACPI Magic Packet filter
1567          */
1568
1569         if (hw->bus.func == 0)
1570                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1571         else if (hw->mac.type == e1000_82580)
1572                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1573                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1574                                  &eeprom_data);
1575         else if (hw->bus.func == 1)
1576                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1577
1578         if (eeprom_data & eeprom_apme_mask)
1579                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1580
1581         /* now that we have the eeprom settings, apply the special cases where
1582          * the eeprom may be wrong or the board simply won't support wake on
1583          * lan on a particular port */
1584         switch (pdev->device) {
1585         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1586                 adapter->eeprom_wol = 0;
1587                 break;
1588         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1589         case E1000_DEV_ID_82576_FIBER:
1590         case E1000_DEV_ID_82576_SERDES:
1591                 /* Wake events only supported on port A for dual fiber
1592                  * regardless of eeprom setting */
1593                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1594                         adapter->eeprom_wol = 0;
1595                 break;
1596         case E1000_DEV_ID_82576_QUAD_COPPER:
1597                 /* if quad port adapter, disable WoL on all but port A */
1598                 if (global_quad_port_a != 0)
1599                         adapter->eeprom_wol = 0;
1600                 else
1601                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1602                 /* Reset for multiple quad port adapters */
1603                 if (++global_quad_port_a == 4)
1604                         global_quad_port_a = 0;
1605                 break;
1606         }
1607
1608         /* initialize the wol settings based on the eeprom settings */
1609         adapter->wol = adapter->eeprom_wol;
1610         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1611
1612         /* reset the hardware with the new settings */
1613         igb_reset(adapter);
1614
1615         /* let the f/w know that the h/w is now under the control of the
1616          * driver. */
1617         igb_get_hw_control(adapter);
1618
1619         strcpy(netdev->name, "eth%d");
1620         err = register_netdev(netdev);
1621         if (err)
1622                 goto err_register;
1623
1624         /* carrier off reporting is important to ethtool even BEFORE open */
1625         netif_carrier_off(netdev);
1626
1627 #ifdef CONFIG_IGB_DCA
1628         if (dca_add_requester(&pdev->dev) == 0) {
1629                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1630                 dev_info(&pdev->dev, "DCA enabled\n");
1631                 igb_setup_dca(adapter);
1632         }
1633
1634 #endif
1635         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1636         /* print bus type/speed/width info */
1637         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1638                  netdev->name,
1639                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1640                                                             "unknown"),
1641                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1642                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1643                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1644                    "unknown"),
1645                  netdev->dev_addr);
1646
1647         igb_read_part_num(hw, &part_num);
1648         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1649                 (part_num >> 8), (part_num & 0xff));
1650
1651         dev_info(&pdev->dev,
1652                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1653                 adapter->msix_entries ? "MSI-X" :
1654                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1655                 adapter->num_rx_queues, adapter->num_tx_queues);
1656
1657         return 0;
1658
1659 err_register:
1660         igb_release_hw_control(adapter);
1661 err_eeprom:
1662         if (!igb_check_reset_block(hw))
1663                 igb_reset_phy(hw);
1664
1665         if (hw->flash_address)
1666                 iounmap(hw->flash_address);
1667 err_sw_init:
1668         igb_clear_interrupt_scheme(adapter);
1669         iounmap(hw->hw_addr);
1670 err_ioremap:
1671         free_netdev(netdev);
1672 err_alloc_etherdev:
1673         pci_release_selected_regions(pdev,
1674                                      pci_select_bars(pdev, IORESOURCE_MEM));
1675 err_pci_reg:
1676 err_dma:
1677         pci_disable_device(pdev);
1678         return err;
1679 }
1680
1681 /**
1682  * igb_remove - Device Removal Routine
1683  * @pdev: PCI device information struct
1684  *
1685  * igb_remove is called by the PCI subsystem to alert the driver
1686  * that it should release a PCI device.  The could be caused by a
1687  * Hot-Plug event, or because the driver is going to be removed from
1688  * memory.
1689  **/
1690 static void __devexit igb_remove(struct pci_dev *pdev)
1691 {
1692         struct net_device *netdev = pci_get_drvdata(pdev);
1693         struct igb_adapter *adapter = netdev_priv(netdev);
1694         struct e1000_hw *hw = &adapter->hw;
1695
1696         /* flush_scheduled work may reschedule our watchdog task, so
1697          * explicitly disable watchdog tasks from being rescheduled  */
1698         set_bit(__IGB_DOWN, &adapter->state);
1699         del_timer_sync(&adapter->watchdog_timer);
1700         del_timer_sync(&adapter->phy_info_timer);
1701
1702         flush_scheduled_work();
1703
1704 #ifdef CONFIG_IGB_DCA
1705         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1706                 dev_info(&pdev->dev, "DCA disabled\n");
1707                 dca_remove_requester(&pdev->dev);
1708                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1709                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1710         }
1711 #endif
1712
1713         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1714          * would have already happened in close and is redundant. */
1715         igb_release_hw_control(adapter);
1716
1717         unregister_netdev(netdev);
1718
1719         igb_clear_interrupt_scheme(adapter);
1720
1721 #ifdef CONFIG_PCI_IOV
1722         /* reclaim resources allocated to VFs */
1723         if (adapter->vf_data) {
1724                 /* disable iov and allow time for transactions to clear */
1725                 pci_disable_sriov(pdev);
1726                 msleep(500);
1727
1728                 kfree(adapter->vf_data);
1729                 adapter->vf_data = NULL;
1730                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1731                 msleep(100);
1732                 dev_info(&pdev->dev, "IOV Disabled\n");
1733         }
1734 #endif
1735
1736         iounmap(hw->hw_addr);
1737         if (hw->flash_address)
1738                 iounmap(hw->flash_address);
1739         pci_release_selected_regions(pdev,
1740                                      pci_select_bars(pdev, IORESOURCE_MEM));
1741
1742         free_netdev(netdev);
1743
1744         pci_disable_pcie_error_reporting(pdev);
1745
1746         pci_disable_device(pdev);
1747 }
1748
1749 /**
1750  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1751  * @adapter: board private structure to initialize
1752  *
1753  * This function initializes the vf specific data storage and then attempts to
1754  * allocate the VFs.  The reason for ordering it this way is because it is much
1755  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1756  * the memory for the VFs.
1757  **/
1758 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1759 {
1760 #ifdef CONFIG_PCI_IOV
1761         struct pci_dev *pdev = adapter->pdev;
1762
1763         if (adapter->vfs_allocated_count > 7)
1764                 adapter->vfs_allocated_count = 7;
1765
1766         if (adapter->vfs_allocated_count) {
1767                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1768                                            sizeof(struct vf_data_storage),
1769                                            GFP_KERNEL);
1770                 /* if allocation failed then we do not support SR-IOV */
1771                 if (!adapter->vf_data) {
1772                         adapter->vfs_allocated_count = 0;
1773                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1774                                 "Data Storage\n");
1775                 }
1776         }
1777
1778         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1779                 kfree(adapter->vf_data);
1780                 adapter->vf_data = NULL;
1781 #endif /* CONFIG_PCI_IOV */
1782                 adapter->vfs_allocated_count = 0;
1783 #ifdef CONFIG_PCI_IOV
1784         } else {
1785                 unsigned char mac_addr[ETH_ALEN];
1786                 int i;
1787                 dev_info(&pdev->dev, "%d vfs allocated\n",
1788                          adapter->vfs_allocated_count);
1789                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1790                         random_ether_addr(mac_addr);
1791                         igb_set_vf_mac(adapter, i, mac_addr);
1792                 }
1793         }
1794 #endif /* CONFIG_PCI_IOV */
1795 }
1796
1797
1798 /**
1799  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1800  * @adapter: board private structure to initialize
1801  *
1802  * igb_init_hw_timer initializes the function pointer and values for the hw
1803  * timer found in hardware.
1804  **/
1805 static void igb_init_hw_timer(struct igb_adapter *adapter)
1806 {
1807         struct e1000_hw *hw = &adapter->hw;
1808
1809         switch (hw->mac.type) {
1810         case e1000_i350:
1811         case e1000_82580:
1812                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1813                 adapter->cycles.read = igb_read_clock;
1814                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1815                 adapter->cycles.mult = 1;
1816                 /*
1817                  * The 82580 timesync updates the system timer every 8ns by 8ns
1818                  * and the value cannot be shifted.  Instead we need to shift
1819                  * the registers to generate a 64bit timer value.  As a result
1820                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1821                  * 24 in order to generate a larger value for synchronization.
1822                  */
1823                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1824                 /* disable system timer temporarily by setting bit 31 */
1825                 wr32(E1000_TSAUXC, 0x80000000);
1826                 wrfl();
1827
1828                 /* Set registers so that rollover occurs soon to test this. */
1829                 wr32(E1000_SYSTIMR, 0x00000000);
1830                 wr32(E1000_SYSTIML, 0x80000000);
1831                 wr32(E1000_SYSTIMH, 0x000000FF);
1832                 wrfl();
1833
1834                 /* enable system timer by clearing bit 31 */
1835                 wr32(E1000_TSAUXC, 0x0);
1836                 wrfl();
1837
1838                 timecounter_init(&adapter->clock,
1839                                  &adapter->cycles,
1840                                  ktime_to_ns(ktime_get_real()));
1841                 /*
1842                  * Synchronize our NIC clock against system wall clock. NIC
1843                  * time stamp reading requires ~3us per sample, each sample
1844                  * was pretty stable even under load => only require 10
1845                  * samples for each offset comparison.
1846                  */
1847                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1848                 adapter->compare.source = &adapter->clock;
1849                 adapter->compare.target = ktime_get_real;
1850                 adapter->compare.num_samples = 10;
1851                 timecompare_update(&adapter->compare, 0);
1852                 break;
1853         case e1000_82576:
1854                 /*
1855                  * Initialize hardware timer: we keep it running just in case
1856                  * that some program needs it later on.
1857                  */
1858                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1859                 adapter->cycles.read = igb_read_clock;
1860                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1861                 adapter->cycles.mult = 1;
1862                 /**
1863                  * Scale the NIC clock cycle by a large factor so that
1864                  * relatively small clock corrections can be added or
1865                  * substracted at each clock tick. The drawbacks of a large
1866                  * factor are a) that the clock register overflows more quickly
1867                  * (not such a big deal) and b) that the increment per tick has
1868                  * to fit into 24 bits.  As a result we need to use a shift of
1869                  * 19 so we can fit a value of 16 into the TIMINCA register.
1870                  */
1871                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1872                 wr32(E1000_TIMINCA,
1873                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1874                                 (16 << IGB_82576_TSYNC_SHIFT));
1875
1876                 /* Set registers so that rollover occurs soon to test this. */
1877                 wr32(E1000_SYSTIML, 0x00000000);
1878                 wr32(E1000_SYSTIMH, 0xFF800000);
1879                 wrfl();
1880
1881                 timecounter_init(&adapter->clock,
1882                                  &adapter->cycles,
1883                                  ktime_to_ns(ktime_get_real()));
1884                 /*
1885                  * Synchronize our NIC clock against system wall clock. NIC
1886                  * time stamp reading requires ~3us per sample, each sample
1887                  * was pretty stable even under load => only require 10
1888                  * samples for each offset comparison.
1889                  */
1890                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1891                 adapter->compare.source = &adapter->clock;
1892                 adapter->compare.target = ktime_get_real;
1893                 adapter->compare.num_samples = 10;
1894                 timecompare_update(&adapter->compare, 0);
1895                 break;
1896         case e1000_82575:
1897                 /* 82575 does not support timesync */
1898         default:
1899                 break;
1900         }
1901
1902 }
1903
1904 /**
1905  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1906  * @adapter: board private structure to initialize
1907  *
1908  * igb_sw_init initializes the Adapter private data structure.
1909  * Fields are initialized based on PCI device information and
1910  * OS network device settings (MTU size).
1911  **/
1912 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1913 {
1914         struct e1000_hw *hw = &adapter->hw;
1915         struct net_device *netdev = adapter->netdev;
1916         struct pci_dev *pdev = adapter->pdev;
1917
1918         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1919
1920         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1921         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1922         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1923         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1924
1925         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1926         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1927
1928 #ifdef CONFIG_PCI_IOV
1929         if (hw->mac.type == e1000_82576)
1930                 adapter->vfs_allocated_count = max_vfs;
1931
1932 #endif /* CONFIG_PCI_IOV */
1933         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1934
1935         /*
1936          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1937          * then we should combine the queues into a queue pair in order to
1938          * conserve interrupts due to limited supply
1939          */
1940         if ((adapter->rss_queues > 4) ||
1941             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1942                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1943
1944         /* This call may decrease the number of queues */
1945         if (igb_init_interrupt_scheme(adapter)) {
1946                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1947                 return -ENOMEM;
1948         }
1949
1950         igb_init_hw_timer(adapter);
1951         igb_probe_vfs(adapter);
1952
1953         /* Explicitly disable IRQ since the NIC can be in any state. */
1954         igb_irq_disable(adapter);
1955
1956         set_bit(__IGB_DOWN, &adapter->state);
1957         return 0;
1958 }
1959
1960 /**
1961  * igb_open - Called when a network interface is made active
1962  * @netdev: network interface device structure
1963  *
1964  * Returns 0 on success, negative value on failure
1965  *
1966  * The open entry point is called when a network interface is made
1967  * active by the system (IFF_UP).  At this point all resources needed
1968  * for transmit and receive operations are allocated, the interrupt
1969  * handler is registered with the OS, the watchdog timer is started,
1970  * and the stack is notified that the interface is ready.
1971  **/
1972 static int igb_open(struct net_device *netdev)
1973 {
1974         struct igb_adapter *adapter = netdev_priv(netdev);
1975         struct e1000_hw *hw = &adapter->hw;
1976         int err;
1977         int i;
1978
1979         /* disallow open during test */
1980         if (test_bit(__IGB_TESTING, &adapter->state))
1981                 return -EBUSY;
1982
1983         netif_carrier_off(netdev);
1984
1985         /* allocate transmit descriptors */
1986         err = igb_setup_all_tx_resources(adapter);
1987         if (err)
1988                 goto err_setup_tx;
1989
1990         /* allocate receive descriptors */
1991         err = igb_setup_all_rx_resources(adapter);
1992         if (err)
1993                 goto err_setup_rx;
1994
1995         igb_power_up_link(adapter);
1996
1997         /* before we allocate an interrupt, we must be ready to handle it.
1998          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
1999          * as soon as we call pci_request_irq, so we have to setup our
2000          * clean_rx handler before we do so.  */
2001         igb_configure(adapter);
2002
2003         err = igb_request_irq(adapter);
2004         if (err)
2005                 goto err_req_irq;
2006
2007         /* From here on the code is the same as igb_up() */
2008         clear_bit(__IGB_DOWN, &adapter->state);
2009
2010         for (i = 0; i < adapter->num_q_vectors; i++) {
2011                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2012                 napi_enable(&q_vector->napi);
2013         }
2014
2015         /* Clear any pending interrupts. */
2016         rd32(E1000_ICR);
2017
2018         igb_irq_enable(adapter);
2019
2020         /* notify VFs that reset has been completed */
2021         if (adapter->vfs_allocated_count) {
2022                 u32 reg_data = rd32(E1000_CTRL_EXT);
2023                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2024                 wr32(E1000_CTRL_EXT, reg_data);
2025         }
2026
2027         netif_tx_start_all_queues(netdev);
2028
2029         /* start the watchdog. */
2030         hw->mac.get_link_status = 1;
2031         schedule_work(&adapter->watchdog_task);
2032
2033         return 0;
2034
2035 err_req_irq:
2036         igb_release_hw_control(adapter);
2037         igb_power_down_link(adapter);
2038         igb_free_all_rx_resources(adapter);
2039 err_setup_rx:
2040         igb_free_all_tx_resources(adapter);
2041 err_setup_tx:
2042         igb_reset(adapter);
2043
2044         return err;
2045 }
2046
2047 /**
2048  * igb_close - Disables a network interface
2049  * @netdev: network interface device structure
2050  *
2051  * Returns 0, this is not allowed to fail
2052  *
2053  * The close entry point is called when an interface is de-activated
2054  * by the OS.  The hardware is still under the driver's control, but
2055  * needs to be disabled.  A global MAC reset is issued to stop the
2056  * hardware, and all transmit and receive resources are freed.
2057  **/
2058 static int igb_close(struct net_device *netdev)
2059 {
2060         struct igb_adapter *adapter = netdev_priv(netdev);
2061
2062         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2063         igb_down(adapter);
2064
2065         igb_free_irq(adapter);
2066
2067         igb_free_all_tx_resources(adapter);
2068         igb_free_all_rx_resources(adapter);
2069
2070         return 0;
2071 }
2072
2073 /**
2074  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2075  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2076  *
2077  * Return 0 on success, negative on failure
2078  **/
2079 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2080 {
2081         struct pci_dev *pdev = tx_ring->pdev;
2082         int size;
2083
2084         size = sizeof(struct igb_buffer) * tx_ring->count;
2085         tx_ring->buffer_info = vmalloc(size);
2086         if (!tx_ring->buffer_info)
2087                 goto err;
2088         memset(tx_ring->buffer_info, 0, size);
2089
2090         /* round up to nearest 4K */
2091         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2092         tx_ring->size = ALIGN(tx_ring->size, 4096);
2093
2094         tx_ring->desc = pci_alloc_consistent(pdev,
2095                                              tx_ring->size,
2096                                              &tx_ring->dma);
2097
2098         if (!tx_ring->desc)
2099                 goto err;
2100
2101         tx_ring->next_to_use = 0;
2102         tx_ring->next_to_clean = 0;
2103         return 0;
2104
2105 err:
2106         vfree(tx_ring->buffer_info);
2107         dev_err(&pdev->dev,
2108                 "Unable to allocate memory for the transmit descriptor ring\n");
2109         return -ENOMEM;
2110 }
2111
2112 /**
2113  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2114  *                                (Descriptors) for all queues
2115  * @adapter: board private structure
2116  *
2117  * Return 0 on success, negative on failure
2118  **/
2119 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2120 {
2121         struct pci_dev *pdev = adapter->pdev;
2122         int i, err = 0;
2123
2124         for (i = 0; i < adapter->num_tx_queues; i++) {
2125                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2126                 if (err) {
2127                         dev_err(&pdev->dev,
2128                                 "Allocation for Tx Queue %u failed\n", i);
2129                         for (i--; i >= 0; i--)
2130                                 igb_free_tx_resources(adapter->tx_ring[i]);
2131                         break;
2132                 }
2133         }
2134
2135         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2136                 int r_idx = i % adapter->num_tx_queues;
2137                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2138         }
2139         return err;
2140 }
2141
2142 /**
2143  * igb_setup_tctl - configure the transmit control registers
2144  * @adapter: Board private structure
2145  **/
2146 void igb_setup_tctl(struct igb_adapter *adapter)
2147 {
2148         struct e1000_hw *hw = &adapter->hw;
2149         u32 tctl;
2150
2151         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2152         wr32(E1000_TXDCTL(0), 0);
2153
2154         /* Program the Transmit Control Register */
2155         tctl = rd32(E1000_TCTL);
2156         tctl &= ~E1000_TCTL_CT;
2157         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2158                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2159
2160         igb_config_collision_dist(hw);
2161
2162         /* Enable transmits */
2163         tctl |= E1000_TCTL_EN;
2164
2165         wr32(E1000_TCTL, tctl);
2166 }
2167
2168 /**
2169  * igb_configure_tx_ring - Configure transmit ring after Reset
2170  * @adapter: board private structure
2171  * @ring: tx ring to configure
2172  *
2173  * Configure a transmit ring after a reset.
2174  **/
2175 void igb_configure_tx_ring(struct igb_adapter *adapter,
2176                            struct igb_ring *ring)
2177 {
2178         struct e1000_hw *hw = &adapter->hw;
2179         u32 txdctl;
2180         u64 tdba = ring->dma;
2181         int reg_idx = ring->reg_idx;
2182
2183         /* disable the queue */
2184         txdctl = rd32(E1000_TXDCTL(reg_idx));
2185         wr32(E1000_TXDCTL(reg_idx),
2186                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2187         wrfl();
2188         mdelay(10);
2189
2190         wr32(E1000_TDLEN(reg_idx),
2191                         ring->count * sizeof(union e1000_adv_tx_desc));
2192         wr32(E1000_TDBAL(reg_idx),
2193                         tdba & 0x00000000ffffffffULL);
2194         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2195
2196         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2197         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2198         writel(0, ring->head);
2199         writel(0, ring->tail);
2200
2201         txdctl |= IGB_TX_PTHRESH;
2202         txdctl |= IGB_TX_HTHRESH << 8;
2203         txdctl |= IGB_TX_WTHRESH << 16;
2204
2205         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2206         wr32(E1000_TXDCTL(reg_idx), txdctl);
2207 }
2208
2209 /**
2210  * igb_configure_tx - Configure transmit Unit after Reset
2211  * @adapter: board private structure
2212  *
2213  * Configure the Tx unit of the MAC after a reset.
2214  **/
2215 static void igb_configure_tx(struct igb_adapter *adapter)
2216 {
2217         int i;
2218
2219         for (i = 0; i < adapter->num_tx_queues; i++)
2220                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2221 }
2222
2223 /**
2224  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2225  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2226  *
2227  * Returns 0 on success, negative on failure
2228  **/
2229 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2230 {
2231         struct pci_dev *pdev = rx_ring->pdev;
2232         int size, desc_len;
2233
2234         size = sizeof(struct igb_buffer) * rx_ring->count;
2235         rx_ring->buffer_info = vmalloc(size);
2236         if (!rx_ring->buffer_info)
2237                 goto err;
2238         memset(rx_ring->buffer_info, 0, size);
2239
2240         desc_len = sizeof(union e1000_adv_rx_desc);
2241
2242         /* Round up to nearest 4K */
2243         rx_ring->size = rx_ring->count * desc_len;
2244         rx_ring->size = ALIGN(rx_ring->size, 4096);
2245
2246         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2247                                              &rx_ring->dma);
2248
2249         if (!rx_ring->desc)
2250                 goto err;
2251
2252         rx_ring->next_to_clean = 0;
2253         rx_ring->next_to_use = 0;
2254
2255         return 0;
2256
2257 err:
2258         vfree(rx_ring->buffer_info);
2259         rx_ring->buffer_info = NULL;
2260         dev_err(&pdev->dev, "Unable to allocate memory for "
2261                 "the receive descriptor ring\n");
2262         return -ENOMEM;
2263 }
2264
2265 /**
2266  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2267  *                                (Descriptors) for all queues
2268  * @adapter: board private structure
2269  *
2270  * Return 0 on success, negative on failure
2271  **/
2272 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2273 {
2274         struct pci_dev *pdev = adapter->pdev;
2275         int i, err = 0;
2276
2277         for (i = 0; i < adapter->num_rx_queues; i++) {
2278                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2279                 if (err) {
2280                         dev_err(&pdev->dev,
2281                                 "Allocation for Rx Queue %u failed\n", i);
2282                         for (i--; i >= 0; i--)
2283                                 igb_free_rx_resources(adapter->rx_ring[i]);
2284                         break;
2285                 }
2286         }
2287
2288         return err;
2289 }
2290
2291 /**
2292  * igb_setup_mrqc - configure the multiple receive queue control registers
2293  * @adapter: Board private structure
2294  **/
2295 static void igb_setup_mrqc(struct igb_adapter *adapter)
2296 {
2297         struct e1000_hw *hw = &adapter->hw;
2298         u32 mrqc, rxcsum;
2299         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2300         union e1000_reta {
2301                 u32 dword;
2302                 u8  bytes[4];
2303         } reta;
2304         static const u8 rsshash[40] = {
2305                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2306                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2307                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2308                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2309
2310         /* Fill out hash function seeds */
2311         for (j = 0; j < 10; j++) {
2312                 u32 rsskey = rsshash[(j * 4)];
2313                 rsskey |= rsshash[(j * 4) + 1] << 8;
2314                 rsskey |= rsshash[(j * 4) + 2] << 16;
2315                 rsskey |= rsshash[(j * 4) + 3] << 24;
2316                 array_wr32(E1000_RSSRK(0), j, rsskey);
2317         }
2318
2319         num_rx_queues = adapter->rss_queues;
2320
2321         if (adapter->vfs_allocated_count) {
2322                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2323                 switch (hw->mac.type) {
2324                 case e1000_i350:
2325                 case e1000_82580:
2326                         num_rx_queues = 1;
2327                         shift = 0;
2328                         break;
2329                 case e1000_82576:
2330                         shift = 3;
2331                         num_rx_queues = 2;
2332                         break;
2333                 case e1000_82575:
2334                         shift = 2;
2335                         shift2 = 6;
2336                 default:
2337                         break;
2338                 }
2339         } else {
2340                 if (hw->mac.type == e1000_82575)
2341                         shift = 6;
2342         }
2343
2344         for (j = 0; j < (32 * 4); j++) {
2345                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2346                 if (shift2)
2347                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2348                 if ((j & 3) == 3)
2349                         wr32(E1000_RETA(j >> 2), reta.dword);
2350         }
2351
2352         /*
2353          * Disable raw packet checksumming so that RSS hash is placed in
2354          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2355          * offloads as they are enabled by default
2356          */
2357         rxcsum = rd32(E1000_RXCSUM);
2358         rxcsum |= E1000_RXCSUM_PCSD;
2359
2360         if (adapter->hw.mac.type >= e1000_82576)
2361                 /* Enable Receive Checksum Offload for SCTP */
2362                 rxcsum |= E1000_RXCSUM_CRCOFL;
2363
2364         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2365         wr32(E1000_RXCSUM, rxcsum);
2366
2367         /* If VMDq is enabled then we set the appropriate mode for that, else
2368          * we default to RSS so that an RSS hash is calculated per packet even
2369          * if we are only using one queue */
2370         if (adapter->vfs_allocated_count) {
2371                 if (hw->mac.type > e1000_82575) {
2372                         /* Set the default pool for the PF's first queue */
2373                         u32 vtctl = rd32(E1000_VT_CTL);
2374                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2375                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2376                         vtctl |= adapter->vfs_allocated_count <<
2377                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2378                         wr32(E1000_VT_CTL, vtctl);
2379                 }
2380                 if (adapter->rss_queues > 1)
2381                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2382                 else
2383                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2384         } else {
2385                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2386         }
2387         igb_vmm_control(adapter);
2388
2389         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2390                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2391         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2392                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2393         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2394                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2395         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2396                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2397
2398         wr32(E1000_MRQC, mrqc);
2399 }
2400
2401 /**
2402  * igb_setup_rctl - configure the receive control registers
2403  * @adapter: Board private structure
2404  **/
2405 void igb_setup_rctl(struct igb_adapter *adapter)
2406 {
2407         struct e1000_hw *hw = &adapter->hw;
2408         u32 rctl;
2409
2410         rctl = rd32(E1000_RCTL);
2411
2412         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2413         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2414
2415         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2416                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2417
2418         /*
2419          * enable stripping of CRC. It's unlikely this will break BMC
2420          * redirection as it did with e1000. Newer features require
2421          * that the HW strips the CRC.
2422          */
2423         rctl |= E1000_RCTL_SECRC;
2424
2425         /* disable store bad packets and clear size bits. */
2426         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2427
2428         /* enable LPE to prevent packets larger than max_frame_size */
2429         rctl |= E1000_RCTL_LPE;
2430
2431         /* disable queue 0 to prevent tail write w/o re-config */
2432         wr32(E1000_RXDCTL(0), 0);
2433
2434         /* Attention!!!  For SR-IOV PF driver operations you must enable
2435          * queue drop for all VF and PF queues to prevent head of line blocking
2436          * if an un-trusted VF does not provide descriptors to hardware.
2437          */
2438         if (adapter->vfs_allocated_count) {
2439                 /* set all queue drop enable bits */
2440                 wr32(E1000_QDE, ALL_QUEUES);
2441         }
2442
2443         wr32(E1000_RCTL, rctl);
2444 }
2445
2446 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2447                                    int vfn)
2448 {
2449         struct e1000_hw *hw = &adapter->hw;
2450         u32 vmolr;
2451
2452         /* if it isn't the PF check to see if VFs are enabled and
2453          * increase the size to support vlan tags */
2454         if (vfn < adapter->vfs_allocated_count &&
2455             adapter->vf_data[vfn].vlans_enabled)
2456                 size += VLAN_TAG_SIZE;
2457
2458         vmolr = rd32(E1000_VMOLR(vfn));
2459         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2460         vmolr |= size | E1000_VMOLR_LPE;
2461         wr32(E1000_VMOLR(vfn), vmolr);
2462
2463         return 0;
2464 }
2465
2466 /**
2467  * igb_rlpml_set - set maximum receive packet size
2468  * @adapter: board private structure
2469  *
2470  * Configure maximum receivable packet size.
2471  **/
2472 static void igb_rlpml_set(struct igb_adapter *adapter)
2473 {
2474         u32 max_frame_size = adapter->max_frame_size;
2475         struct e1000_hw *hw = &adapter->hw;
2476         u16 pf_id = adapter->vfs_allocated_count;
2477
2478         if (adapter->vlgrp)
2479                 max_frame_size += VLAN_TAG_SIZE;
2480
2481         /* if vfs are enabled we set RLPML to the largest possible request
2482          * size and set the VMOLR RLPML to the size we need */
2483         if (pf_id) {
2484                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2485                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2486         }
2487
2488         wr32(E1000_RLPML, max_frame_size);
2489 }
2490
2491 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2492                                  int vfn, bool aupe)
2493 {
2494         struct e1000_hw *hw = &adapter->hw;
2495         u32 vmolr;
2496
2497         /*
2498          * This register exists only on 82576 and newer so if we are older then
2499          * we should exit and do nothing
2500          */
2501         if (hw->mac.type < e1000_82576)
2502                 return;
2503
2504         vmolr = rd32(E1000_VMOLR(vfn));
2505         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2506         if (aupe)
2507                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2508         else
2509                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2510
2511         /* clear all bits that might not be set */
2512         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2513
2514         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2515                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2516         /*
2517          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2518          * multicast packets
2519          */
2520         if (vfn <= adapter->vfs_allocated_count)
2521                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2522
2523         wr32(E1000_VMOLR(vfn), vmolr);
2524 }
2525
2526 /**
2527  * igb_configure_rx_ring - Configure a receive ring after Reset
2528  * @adapter: board private structure
2529  * @ring: receive ring to be configured
2530  *
2531  * Configure the Rx unit of the MAC after a reset.
2532  **/
2533 void igb_configure_rx_ring(struct igb_adapter *adapter,
2534                            struct igb_ring *ring)
2535 {
2536         struct e1000_hw *hw = &adapter->hw;
2537         u64 rdba = ring->dma;
2538         int reg_idx = ring->reg_idx;
2539         u32 srrctl, rxdctl;
2540
2541         /* disable the queue */
2542         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2543         wr32(E1000_RXDCTL(reg_idx),
2544                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2545
2546         /* Set DMA base address registers */
2547         wr32(E1000_RDBAL(reg_idx),
2548              rdba & 0x00000000ffffffffULL);
2549         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2550         wr32(E1000_RDLEN(reg_idx),
2551                        ring->count * sizeof(union e1000_adv_rx_desc));
2552
2553         /* initialize head and tail */
2554         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2555         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2556         writel(0, ring->head);
2557         writel(0, ring->tail);
2558
2559         /* set descriptor configuration */
2560         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2561                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2562                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2563 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2564                 srrctl |= IGB_RXBUFFER_16384 >>
2565                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2566 #else
2567                 srrctl |= (PAGE_SIZE / 2) >>
2568                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2569 #endif
2570                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2571         } else {
2572                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2573                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2574                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2575         }
2576         if (hw->mac.type == e1000_82580)
2577                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2578         /* Only set Drop Enable if we are supporting multiple queues */
2579         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2580                 srrctl |= E1000_SRRCTL_DROP_EN;
2581
2582         wr32(E1000_SRRCTL(reg_idx), srrctl);
2583
2584         /* set filtering for VMDQ pools */
2585         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2586
2587         /* enable receive descriptor fetching */
2588         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2589         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2590         rxdctl &= 0xFFF00000;
2591         rxdctl |= IGB_RX_PTHRESH;
2592         rxdctl |= IGB_RX_HTHRESH << 8;
2593         rxdctl |= IGB_RX_WTHRESH << 16;
2594         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2595 }
2596
2597 /**
2598  * igb_configure_rx - Configure receive Unit after Reset
2599  * @adapter: board private structure
2600  *
2601  * Configure the Rx unit of the MAC after a reset.
2602  **/
2603 static void igb_configure_rx(struct igb_adapter *adapter)
2604 {
2605         int i;
2606
2607         /* set UTA to appropriate mode */
2608         igb_set_uta(adapter);
2609
2610         /* set the correct pool for the PF default MAC address in entry 0 */
2611         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2612                          adapter->vfs_allocated_count);
2613
2614         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2615          * the Base and Length of the Rx Descriptor Ring */
2616         for (i = 0; i < adapter->num_rx_queues; i++)
2617                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2618 }
2619
2620 /**
2621  * igb_free_tx_resources - Free Tx Resources per Queue
2622  * @tx_ring: Tx descriptor ring for a specific queue
2623  *
2624  * Free all transmit software resources
2625  **/
2626 void igb_free_tx_resources(struct igb_ring *tx_ring)
2627 {
2628         igb_clean_tx_ring(tx_ring);
2629
2630         vfree(tx_ring->buffer_info);
2631         tx_ring->buffer_info = NULL;
2632
2633         /* if not set, then don't free */
2634         if (!tx_ring->desc)
2635                 return;
2636
2637         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2638                             tx_ring->desc, tx_ring->dma);
2639
2640         tx_ring->desc = NULL;
2641 }
2642
2643 /**
2644  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2645  * @adapter: board private structure
2646  *
2647  * Free all transmit software resources
2648  **/
2649 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2650 {
2651         int i;
2652
2653         for (i = 0; i < adapter->num_tx_queues; i++)
2654                 igb_free_tx_resources(adapter->tx_ring[i]);
2655 }
2656
2657 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2658                                     struct igb_buffer *buffer_info)
2659 {
2660         if (buffer_info->dma) {
2661                 if (buffer_info->mapped_as_page)
2662                         pci_unmap_page(tx_ring->pdev,
2663                                         buffer_info->dma,
2664                                         buffer_info->length,
2665                                         PCI_DMA_TODEVICE);
2666                 else
2667                         pci_unmap_single(tx_ring->pdev,
2668                                         buffer_info->dma,
2669                                         buffer_info->length,
2670                                         PCI_DMA_TODEVICE);
2671                 buffer_info->dma = 0;
2672         }
2673         if (buffer_info->skb) {
2674                 dev_kfree_skb_any(buffer_info->skb);
2675                 buffer_info->skb = NULL;
2676         }
2677         buffer_info->time_stamp = 0;
2678         buffer_info->length = 0;
2679         buffer_info->next_to_watch = 0;
2680         buffer_info->mapped_as_page = false;
2681 }
2682
2683 /**
2684  * igb_clean_tx_ring - Free Tx Buffers
2685  * @tx_ring: ring to be cleaned
2686  **/
2687 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2688 {
2689         struct igb_buffer *buffer_info;
2690         unsigned long size;
2691         unsigned int i;
2692
2693         if (!tx_ring->buffer_info)
2694                 return;
2695         /* Free all the Tx ring sk_buffs */
2696
2697         for (i = 0; i < tx_ring->count; i++) {
2698                 buffer_info = &tx_ring->buffer_info[i];
2699                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2700         }
2701
2702         size = sizeof(struct igb_buffer) * tx_ring->count;
2703         memset(tx_ring->buffer_info, 0, size);
2704
2705         /* Zero out the descriptor ring */
2706         memset(tx_ring->desc, 0, tx_ring->size);
2707
2708         tx_ring->next_to_use = 0;
2709         tx_ring->next_to_clean = 0;
2710 }
2711
2712 /**
2713  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2714  * @adapter: board private structure
2715  **/
2716 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2717 {
2718         int i;
2719
2720         for (i = 0; i < adapter->num_tx_queues; i++)
2721                 igb_clean_tx_ring(adapter->tx_ring[i]);
2722 }
2723
2724 /**
2725  * igb_free_rx_resources - Free Rx Resources
2726  * @rx_ring: ring to clean the resources from
2727  *
2728  * Free all receive software resources
2729  **/
2730 void igb_free_rx_resources(struct igb_ring *rx_ring)
2731 {
2732         igb_clean_rx_ring(rx_ring);
2733
2734         vfree(rx_ring->buffer_info);
2735         rx_ring->buffer_info = NULL;
2736
2737         /* if not set, then don't free */
2738         if (!rx_ring->desc)
2739                 return;
2740
2741         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2742                             rx_ring->desc, rx_ring->dma);
2743
2744         rx_ring->desc = NULL;
2745 }
2746
2747 /**
2748  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2749  * @adapter: board private structure
2750  *
2751  * Free all receive software resources
2752  **/
2753 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2754 {
2755         int i;
2756
2757         for (i = 0; i < adapter->num_rx_queues; i++)
2758                 igb_free_rx_resources(adapter->rx_ring[i]);
2759 }
2760
2761 /**
2762  * igb_clean_rx_ring - Free Rx Buffers per Queue
2763  * @rx_ring: ring to free buffers from
2764  **/
2765 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2766 {
2767         struct igb_buffer *buffer_info;
2768         unsigned long size;
2769         unsigned int i;
2770
2771         if (!rx_ring->buffer_info)
2772                 return;
2773
2774         /* Free all the Rx ring sk_buffs */
2775         for (i = 0; i < rx_ring->count; i++) {
2776                 buffer_info = &rx_ring->buffer_info[i];
2777                 if (buffer_info->dma) {
2778                         pci_unmap_single(rx_ring->pdev,
2779                                          buffer_info->dma,
2780                                          rx_ring->rx_buffer_len,
2781                                          PCI_DMA_FROMDEVICE);
2782                         buffer_info->dma = 0;
2783                 }
2784
2785                 if (buffer_info->skb) {
2786                         dev_kfree_skb(buffer_info->skb);
2787                         buffer_info->skb = NULL;
2788                 }
2789                 if (buffer_info->page_dma) {
2790                         pci_unmap_page(rx_ring->pdev,
2791                                        buffer_info->page_dma,
2792                                        PAGE_SIZE / 2,
2793                                        PCI_DMA_FROMDEVICE);
2794                         buffer_info->page_dma = 0;
2795                 }
2796                 if (buffer_info->page) {
2797                         put_page(buffer_info->page);
2798                         buffer_info->page = NULL;
2799                         buffer_info->page_offset = 0;
2800                 }
2801         }
2802
2803         size = sizeof(struct igb_buffer) * rx_ring->count;
2804         memset(rx_ring->buffer_info, 0, size);
2805
2806         /* Zero out the descriptor ring */
2807         memset(rx_ring->desc, 0, rx_ring->size);
2808
2809         rx_ring->next_to_clean = 0;
2810         rx_ring->next_to_use = 0;
2811 }
2812
2813 /**
2814  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2815  * @adapter: board private structure
2816  **/
2817 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2818 {
2819         int i;
2820
2821         for (i = 0; i < adapter->num_rx_queues; i++)
2822                 igb_clean_rx_ring(adapter->rx_ring[i]);
2823 }
2824
2825 /**
2826  * igb_set_mac - Change the Ethernet Address of the NIC
2827  * @netdev: network interface device structure
2828  * @p: pointer to an address structure
2829  *
2830  * Returns 0 on success, negative on failure
2831  **/
2832 static int igb_set_mac(struct net_device *netdev, void *p)
2833 {
2834         struct igb_adapter *adapter = netdev_priv(netdev);
2835         struct e1000_hw *hw = &adapter->hw;
2836         struct sockaddr *addr = p;
2837
2838         if (!is_valid_ether_addr(addr->sa_data))
2839                 return -EADDRNOTAVAIL;
2840
2841         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2842         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2843
2844         /* set the correct pool for the new PF MAC address in entry 0 */
2845         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2846                          adapter->vfs_allocated_count);
2847
2848         return 0;
2849 }
2850
2851 /**
2852  * igb_write_mc_addr_list - write multicast addresses to MTA
2853  * @netdev: network interface device structure
2854  *
2855  * Writes multicast address list to the MTA hash table.
2856  * Returns: -ENOMEM on failure
2857  *                0 on no addresses written
2858  *                X on writing X addresses to MTA
2859  **/
2860 static int igb_write_mc_addr_list(struct net_device *netdev)
2861 {
2862         struct igb_adapter *adapter = netdev_priv(netdev);
2863         struct e1000_hw *hw = &adapter->hw;
2864         struct netdev_hw_addr *ha;
2865         u8  *mta_list;
2866         int i;
2867
2868         if (netdev_mc_empty(netdev)) {
2869                 /* nothing to program, so clear mc list */
2870                 igb_update_mc_addr_list(hw, NULL, 0);
2871                 igb_restore_vf_multicasts(adapter);
2872                 return 0;
2873         }
2874
2875         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2876         if (!mta_list)
2877                 return -ENOMEM;
2878
2879         /* The shared function expects a packed array of only addresses. */
2880         i = 0;
2881         netdev_for_each_mc_addr(ha, netdev)
2882                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
2883
2884         igb_update_mc_addr_list(hw, mta_list, i);
2885         kfree(mta_list);
2886
2887         return netdev_mc_count(netdev);
2888 }
2889
2890 /**
2891  * igb_write_uc_addr_list - write unicast addresses to RAR table
2892  * @netdev: network interface device structure
2893  *
2894  * Writes unicast address list to the RAR table.
2895  * Returns: -ENOMEM on failure/insufficient address space
2896  *                0 on no addresses written
2897  *                X on writing X addresses to the RAR table
2898  **/
2899 static int igb_write_uc_addr_list(struct net_device *netdev)
2900 {
2901         struct igb_adapter *adapter = netdev_priv(netdev);
2902         struct e1000_hw *hw = &adapter->hw;
2903         unsigned int vfn = adapter->vfs_allocated_count;
2904         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2905         int count = 0;
2906
2907         /* return ENOMEM indicating insufficient memory for addresses */
2908         if (netdev_uc_count(netdev) > rar_entries)
2909                 return -ENOMEM;
2910
2911         if (!netdev_uc_empty(netdev) && rar_entries) {
2912                 struct netdev_hw_addr *ha;
2913
2914                 netdev_for_each_uc_addr(ha, netdev) {
2915                         if (!rar_entries)
2916                                 break;
2917                         igb_rar_set_qsel(adapter, ha->addr,
2918                                          rar_entries--,
2919                                          vfn);
2920                         count++;
2921                 }
2922         }
2923         /* write the addresses in reverse order to avoid write combining */
2924         for (; rar_entries > 0 ; rar_entries--) {
2925                 wr32(E1000_RAH(rar_entries), 0);
2926                 wr32(E1000_RAL(rar_entries), 0);
2927         }
2928         wrfl();
2929
2930         return count;
2931 }
2932
2933 /**
2934  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2935  * @netdev: network interface device structure
2936  *
2937  * The set_rx_mode entry point is called whenever the unicast or multicast
2938  * address lists or the network interface flags are updated.  This routine is
2939  * responsible for configuring the hardware for proper unicast, multicast,
2940  * promiscuous mode, and all-multi behavior.
2941  **/
2942 static void igb_set_rx_mode(struct net_device *netdev)
2943 {
2944         struct igb_adapter *adapter = netdev_priv(netdev);
2945         struct e1000_hw *hw = &adapter->hw;
2946         unsigned int vfn = adapter->vfs_allocated_count;
2947         u32 rctl, vmolr = 0;
2948         int count;
2949
2950         /* Check for Promiscuous and All Multicast modes */
2951         rctl = rd32(E1000_RCTL);
2952
2953         /* clear the effected bits */
2954         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2955
2956         if (netdev->flags & IFF_PROMISC) {
2957                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2958                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2959         } else {
2960                 if (netdev->flags & IFF_ALLMULTI) {
2961                         rctl |= E1000_RCTL_MPE;
2962                         vmolr |= E1000_VMOLR_MPME;
2963                 } else {
2964                         /*
2965                          * Write addresses to the MTA, if the attempt fails
2966                          * then we should just turn on promiscous mode so
2967                          * that we can at least receive multicast traffic
2968                          */
2969                         count = igb_write_mc_addr_list(netdev);
2970                         if (count < 0) {
2971                                 rctl |= E1000_RCTL_MPE;
2972                                 vmolr |= E1000_VMOLR_MPME;
2973                         } else if (count) {
2974                                 vmolr |= E1000_VMOLR_ROMPE;
2975                         }
2976                 }
2977                 /*
2978                  * Write addresses to available RAR registers, if there is not
2979                  * sufficient space to store all the addresses then enable
2980                  * unicast promiscous mode
2981                  */
2982                 count = igb_write_uc_addr_list(netdev);
2983                 if (count < 0) {
2984                         rctl |= E1000_RCTL_UPE;
2985                         vmolr |= E1000_VMOLR_ROPE;
2986                 }
2987                 rctl |= E1000_RCTL_VFE;
2988         }
2989         wr32(E1000_RCTL, rctl);
2990
2991         /*
2992          * In order to support SR-IOV and eventually VMDq it is necessary to set
2993          * the VMOLR to enable the appropriate modes.  Without this workaround
2994          * we will have issues with VLAN tag stripping not being done for frames
2995          * that are only arriving because we are the default pool
2996          */
2997         if (hw->mac.type < e1000_82576)
2998                 return;
2999
3000         vmolr |= rd32(E1000_VMOLR(vfn)) &
3001                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3002         wr32(E1000_VMOLR(vfn), vmolr);
3003         igb_restore_vf_multicasts(adapter);
3004 }
3005
3006 /* Need to wait a few seconds after link up to get diagnostic information from
3007  * the phy */
3008 static void igb_update_phy_info(unsigned long data)
3009 {
3010         struct igb_adapter *adapter = (struct igb_adapter *) data;
3011         igb_get_phy_info(&adapter->hw);
3012 }
3013
3014 /**
3015  * igb_has_link - check shared code for link and determine up/down
3016  * @adapter: pointer to driver private info
3017  **/
3018 bool igb_has_link(struct igb_adapter *adapter)
3019 {
3020         struct e1000_hw *hw = &adapter->hw;
3021         bool link_active = false;
3022         s32 ret_val = 0;
3023
3024         /* get_link_status is set on LSC (link status) interrupt or
3025          * rx sequence error interrupt.  get_link_status will stay
3026          * false until the e1000_check_for_link establishes link
3027          * for copper adapters ONLY
3028          */
3029         switch (hw->phy.media_type) {
3030         case e1000_media_type_copper:
3031                 if (hw->mac.get_link_status) {
3032                         ret_val = hw->mac.ops.check_for_link(hw);
3033                         link_active = !hw->mac.get_link_status;
3034                 } else {
3035                         link_active = true;
3036                 }
3037                 break;
3038         case e1000_media_type_internal_serdes:
3039                 ret_val = hw->mac.ops.check_for_link(hw);
3040                 link_active = hw->mac.serdes_has_link;
3041                 break;
3042         default:
3043         case e1000_media_type_unknown:
3044                 break;
3045         }
3046
3047         return link_active;
3048 }
3049
3050 /**
3051  * igb_watchdog - Timer Call-back
3052  * @data: pointer to adapter cast into an unsigned long
3053  **/
3054 static void igb_watchdog(unsigned long data)
3055 {
3056         struct igb_adapter *adapter = (struct igb_adapter *)data;
3057         /* Do the rest outside of interrupt context */
3058         schedule_work(&adapter->watchdog_task);
3059 }
3060
3061 static void igb_watchdog_task(struct work_struct *work)
3062 {
3063         struct igb_adapter *adapter = container_of(work,
3064                                                    struct igb_adapter,
3065                                                    watchdog_task);
3066         struct e1000_hw *hw = &adapter->hw;
3067         struct net_device *netdev = adapter->netdev;
3068         u32 link;
3069         int i;
3070
3071         link = igb_has_link(adapter);
3072         if (link) {
3073                 if (!netif_carrier_ok(netdev)) {
3074                         u32 ctrl;
3075                         hw->mac.ops.get_speed_and_duplex(hw,
3076                                                          &adapter->link_speed,
3077                                                          &adapter->link_duplex);
3078
3079                         ctrl = rd32(E1000_CTRL);
3080                         /* Links status message must follow this format */
3081                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3082                                  "Flow Control: %s\n",
3083                                netdev->name,
3084                                adapter->link_speed,
3085                                adapter->link_duplex == FULL_DUPLEX ?
3086                                  "Full Duplex" : "Half Duplex",
3087                                ((ctrl & E1000_CTRL_TFCE) &&
3088                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3089                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3090                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3091
3092                         /* adjust timeout factor according to speed/duplex */
3093                         adapter->tx_timeout_factor = 1;
3094                         switch (adapter->link_speed) {
3095                         case SPEED_10:
3096                                 adapter->tx_timeout_factor = 14;
3097                                 break;
3098                         case SPEED_100:
3099                                 /* maybe add some timeout factor ? */
3100                                 break;
3101                         }
3102
3103                         netif_carrier_on(netdev);
3104
3105                         igb_ping_all_vfs(adapter);
3106
3107                         /* link state has changed, schedule phy info update */
3108                         if (!test_bit(__IGB_DOWN, &adapter->state))
3109                                 mod_timer(&adapter->phy_info_timer,
3110                                           round_jiffies(jiffies + 2 * HZ));
3111                 }
3112         } else {
3113                 if (netif_carrier_ok(netdev)) {
3114                         adapter->link_speed = 0;
3115                         adapter->link_duplex = 0;
3116                         /* Links status message must follow this format */
3117                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3118                                netdev->name);
3119                         netif_carrier_off(netdev);
3120
3121                         igb_ping_all_vfs(adapter);
3122
3123                         /* link state has changed, schedule phy info update */
3124                         if (!test_bit(__IGB_DOWN, &adapter->state))
3125                                 mod_timer(&adapter->phy_info_timer,
3126                                           round_jiffies(jiffies + 2 * HZ));
3127                 }
3128         }
3129
3130         igb_update_stats(adapter);
3131
3132         for (i = 0; i < adapter->num_tx_queues; i++) {
3133                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3134                 if (!netif_carrier_ok(netdev)) {
3135                         /* We've lost link, so the controller stops DMA,
3136                          * but we've got queued Tx work that's never going
3137                          * to get done, so reset controller to flush Tx.
3138                          * (Do the reset outside of interrupt context). */
3139                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3140                                 adapter->tx_timeout_count++;
3141                                 schedule_work(&adapter->reset_task);
3142                                 /* return immediately since reset is imminent */
3143                                 return;
3144                         }
3145                 }
3146
3147                 /* Force detection of hung controller every watchdog period */
3148                 tx_ring->detect_tx_hung = true;
3149         }
3150
3151         /* Cause software interrupt to ensure rx ring is cleaned */
3152         if (adapter->msix_entries) {
3153                 u32 eics = 0;
3154                 for (i = 0; i < adapter->num_q_vectors; i++) {
3155                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3156                         eics |= q_vector->eims_value;
3157                 }
3158                 wr32(E1000_EICS, eics);
3159         } else {
3160                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3161         }
3162
3163         /* Reset the timer */
3164         if (!test_bit(__IGB_DOWN, &adapter->state))
3165                 mod_timer(&adapter->watchdog_timer,
3166                           round_jiffies(jiffies + 2 * HZ));
3167 }
3168
3169 enum latency_range {
3170         lowest_latency = 0,
3171         low_latency = 1,
3172         bulk_latency = 2,
3173         latency_invalid = 255
3174 };
3175
3176 /**
3177  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3178  *
3179  *      Stores a new ITR value based on strictly on packet size.  This
3180  *      algorithm is less sophisticated than that used in igb_update_itr,
3181  *      due to the difficulty of synchronizing statistics across multiple
3182  *      receive rings.  The divisors and thresholds used by this fuction
3183  *      were determined based on theoretical maximum wire speed and testing
3184  *      data, in order to minimize response time while increasing bulk
3185  *      throughput.
3186  *      This functionality is controlled by the InterruptThrottleRate module
3187  *      parameter (see igb_param.c)
3188  *      NOTE:  This function is called only when operating in a multiqueue
3189  *             receive environment.
3190  * @q_vector: pointer to q_vector
3191  **/
3192 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3193 {
3194         int new_val = q_vector->itr_val;
3195         int avg_wire_size = 0;
3196         struct igb_adapter *adapter = q_vector->adapter;
3197
3198         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3199          * ints/sec - ITR timer value of 120 ticks.
3200          */
3201         if (adapter->link_speed != SPEED_1000) {
3202                 new_val = 976;
3203                 goto set_itr_val;
3204         }
3205
3206         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3207                 struct igb_ring *ring = q_vector->rx_ring;
3208                 avg_wire_size = ring->total_bytes / ring->total_packets;
3209         }
3210
3211         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3212                 struct igb_ring *ring = q_vector->tx_ring;
3213                 avg_wire_size = max_t(u32, avg_wire_size,
3214                                       (ring->total_bytes /
3215                                        ring->total_packets));
3216         }
3217
3218         /* if avg_wire_size isn't set no work was done */
3219         if (!avg_wire_size)
3220                 goto clear_counts;
3221
3222         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3223         avg_wire_size += 24;
3224
3225         /* Don't starve jumbo frames */
3226         avg_wire_size = min(avg_wire_size, 3000);
3227
3228         /* Give a little boost to mid-size frames */
3229         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3230                 new_val = avg_wire_size / 3;
3231         else
3232                 new_val = avg_wire_size / 2;
3233
3234         /* when in itr mode 3 do not exceed 20K ints/sec */
3235         if (adapter->rx_itr_setting == 3 && new_val < 196)
3236                 new_val = 196;
3237
3238 set_itr_val:
3239         if (new_val != q_vector->itr_val) {
3240                 q_vector->itr_val = new_val;
3241                 q_vector->set_itr = 1;
3242         }
3243 clear_counts:
3244         if (q_vector->rx_ring) {
3245                 q_vector->rx_ring->total_bytes = 0;
3246                 q_vector->rx_ring->total_packets = 0;
3247         }
3248         if (q_vector->tx_ring) {
3249                 q_vector->tx_ring->total_bytes = 0;
3250                 q_vector->tx_ring->total_packets = 0;
3251         }
3252 }
3253
3254 /**
3255  * igb_update_itr - update the dynamic ITR value based on statistics
3256  *      Stores a new ITR value based on packets and byte
3257  *      counts during the last interrupt.  The advantage of per interrupt
3258  *      computation is faster updates and more accurate ITR for the current
3259  *      traffic pattern.  Constants in this function were computed
3260  *      based on theoretical maximum wire speed and thresholds were set based
3261  *      on testing data as well as attempting to minimize response time
3262  *      while increasing bulk throughput.
3263  *      this functionality is controlled by the InterruptThrottleRate module
3264  *      parameter (see igb_param.c)
3265  *      NOTE:  These calculations are only valid when operating in a single-
3266  *             queue environment.
3267  * @adapter: pointer to adapter
3268  * @itr_setting: current q_vector->itr_val
3269  * @packets: the number of packets during this measurement interval
3270  * @bytes: the number of bytes during this measurement interval
3271  **/
3272 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3273                                    int packets, int bytes)
3274 {
3275         unsigned int retval = itr_setting;
3276
3277         if (packets == 0)
3278                 goto update_itr_done;
3279
3280         switch (itr_setting) {
3281         case lowest_latency:
3282                 /* handle TSO and jumbo frames */
3283                 if (bytes/packets > 8000)
3284                         retval = bulk_latency;
3285                 else if ((packets < 5) && (bytes > 512))
3286                         retval = low_latency;
3287                 break;
3288         case low_latency:  /* 50 usec aka 20000 ints/s */
3289                 if (bytes > 10000) {
3290                         /* this if handles the TSO accounting */
3291                         if (bytes/packets > 8000) {
3292                                 retval = bulk_latency;
3293                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3294                                 retval = bulk_latency;
3295                         } else if ((packets > 35)) {
3296                                 retval = lowest_latency;
3297                         }
3298                 } else if (bytes/packets > 2000) {
3299                         retval = bulk_latency;
3300                 } else if (packets <= 2 && bytes < 512) {
3301                         retval = lowest_latency;
3302                 }
3303                 break;
3304         case bulk_latency: /* 250 usec aka 4000 ints/s */
3305                 if (bytes > 25000) {
3306                         if (packets > 35)
3307                                 retval = low_latency;
3308                 } else if (bytes < 1500) {
3309                         retval = low_latency;
3310                 }
3311                 break;
3312         }
3313
3314 update_itr_done:
3315         return retval;
3316 }
3317
3318 static void igb_set_itr(struct igb_adapter *adapter)
3319 {
3320         struct igb_q_vector *q_vector = adapter->q_vector[0];
3321         u16 current_itr;
3322         u32 new_itr = q_vector->itr_val;
3323
3324         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3325         if (adapter->link_speed != SPEED_1000) {
3326                 current_itr = 0;
3327                 new_itr = 4000;
3328                 goto set_itr_now;
3329         }
3330
3331         adapter->rx_itr = igb_update_itr(adapter,
3332                                     adapter->rx_itr,
3333                                     q_vector->rx_ring->total_packets,
3334                                     q_vector->rx_ring->total_bytes);
3335
3336         adapter->tx_itr = igb_update_itr(adapter,
3337                                     adapter->tx_itr,
3338                                     q_vector->tx_ring->total_packets,
3339                                     q_vector->tx_ring->total_bytes);
3340         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3341
3342         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3343         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3344                 current_itr = low_latency;
3345
3346         switch (current_itr) {
3347         /* counts and packets in update_itr are dependent on these numbers */
3348         case lowest_latency:
3349                 new_itr = 56;  /* aka 70,000 ints/sec */
3350                 break;
3351         case low_latency:
3352                 new_itr = 196; /* aka 20,000 ints/sec */
3353                 break;
3354         case bulk_latency:
3355                 new_itr = 980; /* aka 4,000 ints/sec */
3356                 break;
3357         default:
3358                 break;
3359         }
3360
3361 set_itr_now:
3362         q_vector->rx_ring->total_bytes = 0;
3363         q_vector->rx_ring->total_packets = 0;
3364         q_vector->tx_ring->total_bytes = 0;
3365         q_vector->tx_ring->total_packets = 0;
3366
3367         if (new_itr != q_vector->itr_val) {
3368                 /* this attempts to bias the interrupt rate towards Bulk
3369                  * by adding intermediate steps when interrupt rate is
3370                  * increasing */
3371                 new_itr = new_itr > q_vector->itr_val ?
3372                              max((new_itr * q_vector->itr_val) /
3373                                  (new_itr + (q_vector->itr_val >> 2)),
3374                                  new_itr) :
3375                              new_itr;
3376                 /* Don't write the value here; it resets the adapter's
3377                  * internal timer, and causes us to delay far longer than
3378                  * we should between interrupts.  Instead, we write the ITR
3379                  * value at the beginning of the next interrupt so the timing
3380                  * ends up being correct.
3381                  */
3382                 q_vector->itr_val = new_itr;
3383                 q_vector->set_itr = 1;
3384         }
3385
3386         return;
3387 }
3388
3389 #define IGB_TX_FLAGS_CSUM               0x00000001
3390 #define IGB_TX_FLAGS_VLAN               0x00000002
3391 #define IGB_TX_FLAGS_TSO                0x00000004
3392 #define IGB_TX_FLAGS_IPV4               0x00000008
3393 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3394 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3395 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3396
3397 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3398                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3399 {
3400         struct e1000_adv_tx_context_desc *context_desc;
3401         unsigned int i;
3402         int err;
3403         struct igb_buffer *buffer_info;
3404         u32 info = 0, tu_cmd = 0;
3405         u32 mss_l4len_idx;
3406         u8 l4len;
3407
3408         if (skb_header_cloned(skb)) {
3409                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3410                 if (err)
3411                         return err;
3412         }
3413
3414         l4len = tcp_hdrlen(skb);
3415         *hdr_len += l4len;
3416
3417         if (skb->protocol == htons(ETH_P_IP)) {
3418                 struct iphdr *iph = ip_hdr(skb);
3419                 iph->tot_len = 0;
3420                 iph->check = 0;
3421                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3422                                                          iph->daddr, 0,
3423                                                          IPPROTO_TCP,
3424                                                          0);
3425         } else if (skb_is_gso_v6(skb)) {
3426                 ipv6_hdr(skb)->payload_len = 0;
3427                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3428                                                        &ipv6_hdr(skb)->daddr,
3429                                                        0, IPPROTO_TCP, 0);
3430         }
3431
3432         i = tx_ring->next_to_use;
3433
3434         buffer_info = &tx_ring->buffer_info[i];
3435         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3436         /* VLAN MACLEN IPLEN */
3437         if (tx_flags & IGB_TX_FLAGS_VLAN)
3438                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3439         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3440         *hdr_len += skb_network_offset(skb);
3441         info |= skb_network_header_len(skb);
3442         *hdr_len += skb_network_header_len(skb);
3443         context_desc->vlan_macip_lens = cpu_to_le32(info);
3444
3445         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3446         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3447
3448         if (skb->protocol == htons(ETH_P_IP))
3449                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3450         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3451
3452         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3453
3454         /* MSS L4LEN IDX */
3455         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3456         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3457
3458         /* For 82575, context index must be unique per ring. */
3459         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3460                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3461
3462         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3463         context_desc->seqnum_seed = 0;
3464
3465         buffer_info->time_stamp = jiffies;
3466         buffer_info->next_to_watch = i;
3467         buffer_info->dma = 0;
3468         i++;
3469         if (i == tx_ring->count)
3470                 i = 0;
3471
3472         tx_ring->next_to_use = i;
3473
3474         return true;
3475 }
3476
3477 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3478                                    struct sk_buff *skb, u32 tx_flags)
3479 {
3480         struct e1000_adv_tx_context_desc *context_desc;
3481         struct pci_dev *pdev = tx_ring->pdev;
3482         struct igb_buffer *buffer_info;
3483         u32 info = 0, tu_cmd = 0;
3484         unsigned int i;
3485
3486         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3487             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3488                 i = tx_ring->next_to_use;
3489                 buffer_info = &tx_ring->buffer_info[i];
3490                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3491
3492                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3493                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3494
3495                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3496                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3497                         info |= skb_network_header_len(skb);
3498
3499                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3500
3501                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3502
3503                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3504                         __be16 protocol;
3505
3506                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3507                                 const struct vlan_ethhdr *vhdr =
3508                                           (const struct vlan_ethhdr*)skb->data;
3509
3510                                 protocol = vhdr->h_vlan_encapsulated_proto;
3511                         } else {
3512                                 protocol = skb->protocol;
3513                         }
3514
3515                         switch (protocol) {
3516                         case cpu_to_be16(ETH_P_IP):
3517                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3518                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3519                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3520                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3521                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3522                                 break;
3523                         case cpu_to_be16(ETH_P_IPV6):
3524                                 /* XXX what about other V6 headers?? */
3525                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3526                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3527                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3528                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3529                                 break;
3530                         default:
3531                                 if (unlikely(net_ratelimit()))
3532                                         dev_warn(&pdev->dev,
3533                                             "partial checksum but proto=%x!\n",
3534                                             skb->protocol);
3535                                 break;
3536                         }
3537                 }
3538
3539                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3540                 context_desc->seqnum_seed = 0;
3541                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3542                         context_desc->mss_l4len_idx =
3543                                 cpu_to_le32(tx_ring->reg_idx << 4);
3544
3545                 buffer_info->time_stamp = jiffies;
3546                 buffer_info->next_to_watch = i;
3547                 buffer_info->dma = 0;
3548
3549                 i++;
3550                 if (i == tx_ring->count)
3551                         i = 0;
3552                 tx_ring->next_to_use = i;
3553
3554                 return true;
3555         }
3556         return false;
3557 }
3558
3559 #define IGB_MAX_TXD_PWR 16
3560 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3561
3562 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3563                                  unsigned int first)
3564 {
3565         struct igb_buffer *buffer_info;
3566         struct pci_dev *pdev = tx_ring->pdev;
3567         unsigned int len = skb_headlen(skb);
3568         unsigned int count = 0, i;
3569         unsigned int f;
3570
3571         i = tx_ring->next_to_use;
3572
3573         buffer_info = &tx_ring->buffer_info[i];
3574         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3575         buffer_info->length = len;
3576         /* set time_stamp *before* dma to help avoid a possible race */
3577         buffer_info->time_stamp = jiffies;
3578         buffer_info->next_to_watch = i;
3579         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3580                                           PCI_DMA_TODEVICE);
3581         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3582                 goto dma_error;
3583
3584         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3585                 struct skb_frag_struct *frag;
3586
3587                 count++;
3588                 i++;
3589                 if (i == tx_ring->count)
3590                         i = 0;
3591
3592                 frag = &skb_shinfo(skb)->frags[f];
3593                 len = frag->size;
3594
3595                 buffer_info = &tx_ring->buffer_info[i];
3596                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3597                 buffer_info->length = len;
3598                 buffer_info->time_stamp = jiffies;
3599                 buffer_info->next_to_watch = i;
3600                 buffer_info->mapped_as_page = true;
3601                 buffer_info->dma = pci_map_page(pdev,
3602                                                 frag->page,
3603                                                 frag->page_offset,
3604                                                 len,
3605                                                 PCI_DMA_TODEVICE);
3606                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3607                         goto dma_error;
3608
3609         }
3610
3611         tx_ring->buffer_info[i].skb = skb;
3612         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3613         tx_ring->buffer_info[first].next_to_watch = i;
3614
3615         return ++count;
3616
3617 dma_error:
3618         dev_err(&pdev->dev, "TX DMA map failed\n");
3619
3620         /* clear timestamp and dma mappings for failed buffer_info mapping */
3621         buffer_info->dma = 0;
3622         buffer_info->time_stamp = 0;
3623         buffer_info->length = 0;
3624         buffer_info->next_to_watch = 0;
3625         buffer_info->mapped_as_page = false;
3626
3627         /* clear timestamp and dma mappings for remaining portion of packet */
3628         while (count--) {
3629                 if (i == 0)
3630                         i = tx_ring->count;
3631                 i--;
3632                 buffer_info = &tx_ring->buffer_info[i];
3633                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3634         }
3635
3636         return 0;
3637 }
3638
3639 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3640                                     u32 tx_flags, int count, u32 paylen,
3641                                     u8 hdr_len)
3642 {
3643         union e1000_adv_tx_desc *tx_desc;
3644         struct igb_buffer *buffer_info;
3645         u32 olinfo_status = 0, cmd_type_len;
3646         unsigned int i = tx_ring->next_to_use;
3647
3648         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3649                         E1000_ADVTXD_DCMD_DEXT);
3650
3651         if (tx_flags & IGB_TX_FLAGS_VLAN)
3652                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3653
3654         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3655                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3656
3657         if (tx_flags & IGB_TX_FLAGS_TSO) {
3658                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3659
3660                 /* insert tcp checksum */
3661                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3662
3663                 /* insert ip checksum */
3664                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3665                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3666
3667         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3668                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3669         }
3670
3671         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3672             (tx_flags & (IGB_TX_FLAGS_CSUM |
3673                          IGB_TX_FLAGS_TSO |
3674                          IGB_TX_FLAGS_VLAN)))
3675                 olinfo_status |= tx_ring->reg_idx << 4;
3676
3677         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3678
3679         do {
3680                 buffer_info = &tx_ring->buffer_info[i];
3681                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3682                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3683                 tx_desc->read.cmd_type_len =
3684                         cpu_to_le32(cmd_type_len | buffer_info->length);
3685                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3686                 count--;
3687                 i++;
3688                 if (i == tx_ring->count)
3689                         i = 0;
3690         } while (count > 0);
3691
3692         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3693         /* Force memory writes to complete before letting h/w
3694          * know there are new descriptors to fetch.  (Only
3695          * applicable for weak-ordered memory model archs,
3696          * such as IA-64). */
3697         wmb();
3698
3699         tx_ring->next_to_use = i;
3700         writel(i, tx_ring->tail);
3701         /* we need this if more than one processor can write to our tail
3702          * at a time, it syncronizes IO on IA64/Altix systems */
3703         mmiowb();
3704 }
3705
3706 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3707 {
3708         struct net_device *netdev = tx_ring->netdev;
3709
3710         netif_stop_subqueue(netdev, tx_ring->queue_index);
3711
3712         /* Herbert's original patch had:
3713          *  smp_mb__after_netif_stop_queue();
3714          * but since that doesn't exist yet, just open code it. */
3715         smp_mb();
3716
3717         /* We need to check again in a case another CPU has just
3718          * made room available. */
3719         if (igb_desc_unused(tx_ring) < size)
3720                 return -EBUSY;
3721
3722         /* A reprieve! */
3723         netif_wake_subqueue(netdev, tx_ring->queue_index);
3724         tx_ring->tx_stats.restart_queue++;
3725         return 0;
3726 }
3727
3728 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3729 {
3730         if (igb_desc_unused(tx_ring) >= size)
3731                 return 0;
3732         return __igb_maybe_stop_tx(tx_ring, size);
3733 }
3734
3735 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3736                                     struct igb_ring *tx_ring)
3737 {
3738         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3739         int tso = 0, count;
3740         u32 tx_flags = 0;
3741         u16 first;
3742         u8 hdr_len = 0;
3743         union skb_shared_tx *shtx = skb_tx(skb);
3744
3745         /* need: 1 descriptor per page,
3746          *       + 2 desc gap to keep tail from touching head,
3747          *       + 1 desc for skb->data,
3748          *       + 1 desc for context descriptor,
3749          * otherwise try next time */
3750         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3751                 /* this is a hard error */
3752                 return NETDEV_TX_BUSY;
3753         }
3754
3755         if (unlikely(shtx->hardware)) {
3756                 shtx->in_progress = 1;
3757                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3758         }
3759
3760         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3761                 tx_flags |= IGB_TX_FLAGS_VLAN;
3762                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3763         }
3764
3765         if (skb->protocol == htons(ETH_P_IP))
3766                 tx_flags |= IGB_TX_FLAGS_IPV4;
3767
3768         first = tx_ring->next_to_use;
3769         if (skb_is_gso(skb)) {
3770                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3771
3772                 if (tso < 0) {
3773                         dev_kfree_skb_any(skb);
3774                         return NETDEV_TX_OK;
3775                 }
3776         }
3777
3778         if (tso)
3779                 tx_flags |= IGB_TX_FLAGS_TSO;
3780         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3781                  (skb->ip_summed == CHECKSUM_PARTIAL))
3782                 tx_flags |= IGB_TX_FLAGS_CSUM;
3783
3784         /*
3785          * count reflects descriptors mapped, if 0 or less then mapping error
3786          * has occured and we need to rewind the descriptor queue
3787          */
3788         count = igb_tx_map_adv(tx_ring, skb, first);
3789         if (!count) {
3790                 dev_kfree_skb_any(skb);
3791                 tx_ring->buffer_info[first].time_stamp = 0;
3792                 tx_ring->next_to_use = first;
3793                 return NETDEV_TX_OK;
3794         }
3795
3796         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3797
3798         /* Make sure there is space in the ring for the next send. */
3799         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3800
3801         return NETDEV_TX_OK;
3802 }
3803
3804 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3805                                       struct net_device *netdev)
3806 {
3807         struct igb_adapter *adapter = netdev_priv(netdev);
3808         struct igb_ring *tx_ring;
3809         int r_idx = 0;
3810
3811         if (test_bit(__IGB_DOWN, &adapter->state)) {
3812                 dev_kfree_skb_any(skb);
3813                 return NETDEV_TX_OK;
3814         }
3815
3816         if (skb->len <= 0) {
3817                 dev_kfree_skb_any(skb);
3818                 return NETDEV_TX_OK;
3819         }
3820
3821         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3822         tx_ring = adapter->multi_tx_table[r_idx];
3823
3824         /* This goes back to the question of how to logically map a tx queue
3825          * to a flow.  Right now, performance is impacted slightly negatively
3826          * if using multiple tx queues.  If the stack breaks away from a
3827          * single qdisc implementation, we can look at this again. */
3828         return igb_xmit_frame_ring_adv(skb, tx_ring);
3829 }
3830
3831 /**
3832  * igb_tx_timeout - Respond to a Tx Hang
3833  * @netdev: network interface device structure
3834  **/
3835 static void igb_tx_timeout(struct net_device *netdev)
3836 {
3837         struct igb_adapter *adapter = netdev_priv(netdev);
3838         struct e1000_hw *hw = &adapter->hw;
3839
3840         /* Do the reset outside of interrupt context */
3841         adapter->tx_timeout_count++;
3842
3843         if (hw->mac.type == e1000_82580)
3844                 hw->dev_spec._82575.global_device_reset = true;
3845
3846         schedule_work(&adapter->reset_task);
3847         wr32(E1000_EICS,
3848              (adapter->eims_enable_mask & ~adapter->eims_other));
3849 }
3850
3851 static void igb_reset_task(struct work_struct *work)
3852 {
3853         struct igb_adapter *adapter;
3854         adapter = container_of(work, struct igb_adapter, reset_task);
3855
3856         igb_reinit_locked(adapter);
3857 }
3858
3859 /**
3860  * igb_get_stats - Get System Network Statistics
3861  * @netdev: network interface device structure
3862  *
3863  * Returns the address of the device statistics structure.
3864  * The statistics are actually updated from the timer callback.
3865  **/
3866 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3867 {
3868         /* only return the current stats */
3869         return &netdev->stats;
3870 }
3871
3872 /**
3873  * igb_change_mtu - Change the Maximum Transfer Unit
3874  * @netdev: network interface device structure
3875  * @new_mtu: new value for maximum frame size
3876  *
3877  * Returns 0 on success, negative on failure
3878  **/
3879 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3880 {
3881         struct igb_adapter *adapter = netdev_priv(netdev);
3882         struct pci_dev *pdev = adapter->pdev;
3883         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3884         u32 rx_buffer_len, i;
3885
3886         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3887                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3888                 return -EINVAL;
3889         }
3890
3891         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3892                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3893                 return -EINVAL;
3894         }
3895
3896         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3897                 msleep(1);
3898
3899         /* igb_down has a dependency on max_frame_size */
3900         adapter->max_frame_size = max_frame;
3901
3902         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3903          * means we reserve 2 more, this pushes us to allocate from the next
3904          * larger slab size.
3905          * i.e. RXBUFFER_2048 --> size-4096 slab
3906          */
3907
3908         if (adapter->hw.mac.type == e1000_82580)
3909                 max_frame += IGB_TS_HDR_LEN;
3910
3911         if (max_frame <= IGB_RXBUFFER_1024)
3912                 rx_buffer_len = IGB_RXBUFFER_1024;
3913         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3914                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3915         else
3916                 rx_buffer_len = IGB_RXBUFFER_128;
3917
3918         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
3919              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
3920                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
3921
3922         if ((adapter->hw.mac.type == e1000_82580) &&
3923             (rx_buffer_len == IGB_RXBUFFER_128))
3924                 rx_buffer_len += IGB_RXBUFFER_64;
3925
3926         if (netif_running(netdev))
3927                 igb_down(adapter);
3928
3929         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3930                  netdev->mtu, new_mtu);
3931         netdev->mtu = new_mtu;
3932
3933         for (i = 0; i < adapter->num_rx_queues; i++)
3934                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3935
3936         if (netif_running(netdev))
3937                 igb_up(adapter);
3938         else
3939                 igb_reset(adapter);
3940
3941         clear_bit(__IGB_RESETTING, &adapter->state);
3942
3943         return 0;
3944 }
3945
3946 /**
3947  * igb_update_stats - Update the board statistics counters
3948  * @adapter: board private structure
3949  **/
3950
3951 void igb_update_stats(struct igb_adapter *adapter)
3952 {
3953         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3954         struct e1000_hw *hw = &adapter->hw;
3955         struct pci_dev *pdev = adapter->pdev;
3956         u32 reg, mpc;
3957         u16 phy_tmp;
3958         int i;
3959         u64 bytes, packets;
3960
3961 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3962
3963         /*
3964          * Prevent stats update while adapter is being reset, or if the pci
3965          * connection is down.
3966          */
3967         if (adapter->link_speed == 0)
3968                 return;
3969         if (pci_channel_offline(pdev))
3970                 return;
3971
3972         bytes = 0;
3973         packets = 0;
3974         for (i = 0; i < adapter->num_rx_queues; i++) {
3975                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3976                 struct igb_ring *ring = adapter->rx_ring[i];
3977                 ring->rx_stats.drops += rqdpc_tmp;
3978                 net_stats->rx_fifo_errors += rqdpc_tmp;
3979                 bytes += ring->rx_stats.bytes;
3980                 packets += ring->rx_stats.packets;
3981         }
3982
3983         net_stats->rx_bytes = bytes;
3984         net_stats->rx_packets = packets;
3985
3986         bytes = 0;
3987         packets = 0;
3988         for (i = 0; i < adapter->num_tx_queues; i++) {
3989                 struct igb_ring *ring = adapter->tx_ring[i];
3990                 bytes += ring->tx_stats.bytes;
3991                 packets += ring->tx_stats.packets;
3992         }
3993         net_stats->tx_bytes = bytes;
3994         net_stats->tx_packets = packets;
3995
3996         /* read stats registers */
3997         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
3998         adapter->stats.gprc += rd32(E1000_GPRC);
3999         adapter->stats.gorc += rd32(E1000_GORCL);
4000         rd32(E1000_GORCH); /* clear GORCL */
4001         adapter->stats.bprc += rd32(E1000_BPRC);
4002         adapter->stats.mprc += rd32(E1000_MPRC);
4003         adapter->stats.roc += rd32(E1000_ROC);
4004
4005         adapter->stats.prc64 += rd32(E1000_PRC64);
4006         adapter->stats.prc127 += rd32(E1000_PRC127);
4007         adapter->stats.prc255 += rd32(E1000_PRC255);
4008         adapter->stats.prc511 += rd32(E1000_PRC511);
4009         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4010         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4011         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4012         adapter->stats.sec += rd32(E1000_SEC);
4013
4014         mpc = rd32(E1000_MPC);
4015         adapter->stats.mpc += mpc;
4016         net_stats->rx_fifo_errors += mpc;
4017         adapter->stats.scc += rd32(E1000_SCC);
4018         adapter->stats.ecol += rd32(E1000_ECOL);
4019         adapter->stats.mcc += rd32(E1000_MCC);
4020         adapter->stats.latecol += rd32(E1000_LATECOL);
4021         adapter->stats.dc += rd32(E1000_DC);
4022         adapter->stats.rlec += rd32(E1000_RLEC);
4023         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4024         adapter->stats.xontxc += rd32(E1000_XONTXC);
4025         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4026         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4027         adapter->stats.fcruc += rd32(E1000_FCRUC);
4028         adapter->stats.gptc += rd32(E1000_GPTC);
4029         adapter->stats.gotc += rd32(E1000_GOTCL);
4030         rd32(E1000_GOTCH); /* clear GOTCL */
4031         adapter->stats.rnbc += rd32(E1000_RNBC);
4032         adapter->stats.ruc += rd32(E1000_RUC);
4033         adapter->stats.rfc += rd32(E1000_RFC);
4034         adapter->stats.rjc += rd32(E1000_RJC);
4035         adapter->stats.tor += rd32(E1000_TORH);
4036         adapter->stats.tot += rd32(E1000_TOTH);
4037         adapter->stats.tpr += rd32(E1000_TPR);
4038
4039         adapter->stats.ptc64 += rd32(E1000_PTC64);
4040         adapter->stats.ptc127 += rd32(E1000_PTC127);
4041         adapter->stats.ptc255 += rd32(E1000_PTC255);
4042         adapter->stats.ptc511 += rd32(E1000_PTC511);
4043         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4044         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4045
4046         adapter->stats.mptc += rd32(E1000_MPTC);
4047         adapter->stats.bptc += rd32(E1000_BPTC);
4048
4049         adapter->stats.tpt += rd32(E1000_TPT);
4050         adapter->stats.colc += rd32(E1000_COLC);
4051
4052         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4053         /* read internal phy specific stats */
4054         reg = rd32(E1000_CTRL_EXT);
4055         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4056                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4057                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4058         }
4059
4060         adapter->stats.tsctc += rd32(E1000_TSCTC);
4061         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4062
4063         adapter->stats.iac += rd32(E1000_IAC);
4064         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4065         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4066         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4067         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4068         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4069         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4070         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4071         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4072
4073         /* Fill out the OS statistics structure */
4074         net_stats->multicast = adapter->stats.mprc;
4075         net_stats->collisions = adapter->stats.colc;
4076
4077         /* Rx Errors */
4078
4079         /* RLEC on some newer hardware can be incorrect so build
4080          * our own version based on RUC and ROC */
4081         net_stats->rx_errors = adapter->stats.rxerrc +
4082                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4083                 adapter->stats.ruc + adapter->stats.roc +
4084                 adapter->stats.cexterr;
4085         net_stats->rx_length_errors = adapter->stats.ruc +
4086                                       adapter->stats.roc;
4087         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4088         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4089         net_stats->rx_missed_errors = adapter->stats.mpc;
4090
4091         /* Tx Errors */
4092         net_stats->tx_errors = adapter->stats.ecol +
4093                                adapter->stats.latecol;
4094         net_stats->tx_aborted_errors = adapter->stats.ecol;
4095         net_stats->tx_window_errors = adapter->stats.latecol;
4096         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4097
4098         /* Tx Dropped needs to be maintained elsewhere */
4099
4100         /* Phy Stats */
4101         if (hw->phy.media_type == e1000_media_type_copper) {
4102                 if ((adapter->link_speed == SPEED_1000) &&
4103                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4104                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4105                         adapter->phy_stats.idle_errors += phy_tmp;
4106                 }
4107         }
4108
4109         /* Management Stats */
4110         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4111         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4112         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4113 }
4114
4115 static irqreturn_t igb_msix_other(int irq, void *data)
4116 {
4117         struct igb_adapter *adapter = data;
4118         struct e1000_hw *hw = &adapter->hw;
4119         u32 icr = rd32(E1000_ICR);
4120         /* reading ICR causes bit 31 of EICR to be cleared */
4121
4122         if (icr & E1000_ICR_DRSTA)
4123                 schedule_work(&adapter->reset_task);
4124
4125         if (icr & E1000_ICR_DOUTSYNC) {
4126                 /* HW is reporting DMA is out of sync */
4127                 adapter->stats.doosync++;
4128         }
4129
4130         /* Check for a mailbox event */
4131         if (icr & E1000_ICR_VMMB)
4132                 igb_msg_task(adapter);
4133
4134         if (icr & E1000_ICR_LSC) {
4135                 hw->mac.get_link_status = 1;
4136                 /* guard against interrupt when we're going down */
4137                 if (!test_bit(__IGB_DOWN, &adapter->state))
4138                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4139         }
4140
4141         if (adapter->vfs_allocated_count)
4142                 wr32(E1000_IMS, E1000_IMS_LSC |
4143                                 E1000_IMS_VMMB |
4144                                 E1000_IMS_DOUTSYNC);
4145         else
4146                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4147         wr32(E1000_EIMS, adapter->eims_other);
4148
4149         return IRQ_HANDLED;
4150 }
4151
4152 static void igb_write_itr(struct igb_q_vector *q_vector)
4153 {
4154         struct igb_adapter *adapter = q_vector->adapter;
4155         u32 itr_val = q_vector->itr_val & 0x7FFC;
4156
4157         if (!q_vector->set_itr)
4158                 return;
4159
4160         if (!itr_val)
4161                 itr_val = 0x4;
4162
4163         if (adapter->hw.mac.type == e1000_82575)
4164                 itr_val |= itr_val << 16;
4165         else
4166                 itr_val |= 0x8000000;
4167
4168         writel(itr_val, q_vector->itr_register);
4169         q_vector->set_itr = 0;
4170 }
4171
4172 static irqreturn_t igb_msix_ring(int irq, void *data)
4173 {
4174         struct igb_q_vector *q_vector = data;
4175
4176         /* Write the ITR value calculated from the previous interrupt. */
4177         igb_write_itr(q_vector);
4178
4179         napi_schedule(&q_vector->napi);
4180
4181         return IRQ_HANDLED;
4182 }
4183
4184 #ifdef CONFIG_IGB_DCA
4185 static void igb_update_dca(struct igb_q_vector *q_vector)
4186 {
4187         struct igb_adapter *adapter = q_vector->adapter;
4188         struct e1000_hw *hw = &adapter->hw;
4189         int cpu = get_cpu();
4190
4191         if (q_vector->cpu == cpu)
4192                 goto out_no_update;
4193
4194         if (q_vector->tx_ring) {
4195                 int q = q_vector->tx_ring->reg_idx;
4196                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4197                 if (hw->mac.type == e1000_82575) {
4198                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4199                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4200                 } else {
4201                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4202                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4203                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4204                 }
4205                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4206                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4207         }
4208         if (q_vector->rx_ring) {
4209                 int q = q_vector->rx_ring->reg_idx;
4210                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4211                 if (hw->mac.type == e1000_82575) {
4212                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4213                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4214                 } else {
4215                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4216                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4217                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4218                 }
4219                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4220                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4221                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4222                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4223         }
4224         q_vector->cpu = cpu;
4225 out_no_update:
4226         put_cpu();
4227 }
4228
4229 static void igb_setup_dca(struct igb_adapter *adapter)
4230 {
4231         struct e1000_hw *hw = &adapter->hw;
4232         int i;
4233
4234         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4235                 return;
4236
4237         /* Always use CB2 mode, difference is masked in the CB driver. */
4238         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4239
4240         for (i = 0; i < adapter->num_q_vectors; i++) {
4241                 adapter->q_vector[i]->cpu = -1;
4242                 igb_update_dca(adapter->q_vector[i]);
4243         }
4244 }
4245
4246 static int __igb_notify_dca(struct device *dev, void *data)
4247 {
4248         struct net_device *netdev = dev_get_drvdata(dev);
4249         struct igb_adapter *adapter = netdev_priv(netdev);
4250         struct pci_dev *pdev = adapter->pdev;
4251         struct e1000_hw *hw = &adapter->hw;
4252         unsigned long event = *(unsigned long *)data;
4253
4254         switch (event) {
4255         case DCA_PROVIDER_ADD:
4256                 /* if already enabled, don't do it again */
4257                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4258                         break;
4259                 if (dca_add_requester(dev) == 0) {
4260                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4261                         dev_info(&pdev->dev, "DCA enabled\n");
4262                         igb_setup_dca(adapter);
4263                         break;
4264                 }
4265                 /* Fall Through since DCA is disabled. */
4266         case DCA_PROVIDER_REMOVE:
4267                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4268                         /* without this a class_device is left
4269                          * hanging around in the sysfs model */
4270                         dca_remove_requester(dev);
4271                         dev_info(&pdev->dev, "DCA disabled\n");
4272                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4273                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4274                 }
4275                 break;
4276         }
4277
4278         return 0;
4279 }
4280
4281 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4282                           void *p)
4283 {
4284         int ret_val;
4285
4286         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4287                                          __igb_notify_dca);
4288
4289         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4290 }
4291 #endif /* CONFIG_IGB_DCA */
4292
4293 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4294 {
4295         struct e1000_hw *hw = &adapter->hw;
4296         u32 ping;
4297         int i;
4298
4299         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4300                 ping = E1000_PF_CONTROL_MSG;
4301                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4302                         ping |= E1000_VT_MSGTYPE_CTS;
4303                 igb_write_mbx(hw, &ping, 1, i);
4304         }
4305 }
4306
4307 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4308 {
4309         struct e1000_hw *hw = &adapter->hw;
4310         u32 vmolr = rd32(E1000_VMOLR(vf));
4311         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4312
4313         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4314                             IGB_VF_FLAG_MULTI_PROMISC);
4315         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4316
4317         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4318                 vmolr |= E1000_VMOLR_MPME;
4319                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4320         } else {
4321                 /*
4322                  * if we have hashes and we are clearing a multicast promisc
4323                  * flag we need to write the hashes to the MTA as this step
4324                  * was previously skipped
4325                  */
4326                 if (vf_data->num_vf_mc_hashes > 30) {
4327                         vmolr |= E1000_VMOLR_MPME;
4328                 } else if (vf_data->num_vf_mc_hashes) {
4329                         int j;
4330                         vmolr |= E1000_VMOLR_ROMPE;
4331                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4332                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4333                 }
4334         }
4335
4336         wr32(E1000_VMOLR(vf), vmolr);
4337
4338         /* there are flags left unprocessed, likely not supported */
4339         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4340                 return -EINVAL;
4341
4342         return 0;
4343
4344 }
4345
4346 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4347                                   u32 *msgbuf, u32 vf)
4348 {
4349         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4350         u16 *hash_list = (u16 *)&msgbuf[1];
4351         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4352         int i;
4353
4354         /* salt away the number of multicast addresses assigned
4355          * to this VF for later use to restore when the PF multi cast
4356          * list changes
4357          */
4358         vf_data->num_vf_mc_hashes = n;
4359
4360         /* only up to 30 hash values supported */
4361         if (n > 30)
4362                 n = 30;
4363
4364         /* store the hashes for later use */
4365         for (i = 0; i < n; i++)
4366                 vf_data->vf_mc_hashes[i] = hash_list[i];
4367
4368         /* Flush and reset the mta with the new values */
4369         igb_set_rx_mode(adapter->netdev);
4370
4371         return 0;
4372 }
4373
4374 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4375 {
4376         struct e1000_hw *hw = &adapter->hw;
4377         struct vf_data_storage *vf_data;
4378         int i, j;
4379
4380         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4381                 u32 vmolr = rd32(E1000_VMOLR(i));
4382                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4383
4384                 vf_data = &adapter->vf_data[i];
4385
4386                 if ((vf_data->num_vf_mc_hashes > 30) ||
4387                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4388                         vmolr |= E1000_VMOLR_MPME;
4389                 } else if (vf_data->num_vf_mc_hashes) {
4390                         vmolr |= E1000_VMOLR_ROMPE;
4391                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4392                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4393                 }
4394                 wr32(E1000_VMOLR(i), vmolr);
4395         }
4396 }
4397
4398 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4399 {
4400         struct e1000_hw *hw = &adapter->hw;
4401         u32 pool_mask, reg, vid;
4402         int i;
4403
4404         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4405
4406         /* Find the vlan filter for this id */
4407         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4408                 reg = rd32(E1000_VLVF(i));
4409
4410                 /* remove the vf from the pool */
4411                 reg &= ~pool_mask;
4412
4413                 /* if pool is empty then remove entry from vfta */
4414                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4415                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4416                         reg = 0;
4417                         vid = reg & E1000_VLVF_VLANID_MASK;
4418                         igb_vfta_set(hw, vid, false);
4419                 }
4420
4421                 wr32(E1000_VLVF(i), reg);
4422         }
4423
4424         adapter->vf_data[vf].vlans_enabled = 0;
4425 }
4426
4427 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4428 {
4429         struct e1000_hw *hw = &adapter->hw;
4430         u32 reg, i;
4431
4432         /* The vlvf table only exists on 82576 hardware and newer */
4433         if (hw->mac.type < e1000_82576)
4434                 return -1;
4435
4436         /* we only need to do this if VMDq is enabled */
4437         if (!adapter->vfs_allocated_count)
4438                 return -1;
4439
4440         /* Find the vlan filter for this id */
4441         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4442                 reg = rd32(E1000_VLVF(i));
4443                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4444                     vid == (reg & E1000_VLVF_VLANID_MASK))
4445                         break;
4446         }
4447
4448         if (add) {
4449                 if (i == E1000_VLVF_ARRAY_SIZE) {
4450                         /* Did not find a matching VLAN ID entry that was
4451                          * enabled.  Search for a free filter entry, i.e.
4452                          * one without the enable bit set
4453                          */
4454                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4455                                 reg = rd32(E1000_VLVF(i));
4456                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4457                                         break;
4458                         }
4459                 }
4460                 if (i < E1000_VLVF_ARRAY_SIZE) {
4461                         /* Found an enabled/available entry */
4462                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4463
4464                         /* if !enabled we need to set this up in vfta */
4465                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4466                                 /* add VID to filter table */
4467                                 igb_vfta_set(hw, vid, true);
4468                                 reg |= E1000_VLVF_VLANID_ENABLE;
4469                         }
4470                         reg &= ~E1000_VLVF_VLANID_MASK;
4471                         reg |= vid;
4472                         wr32(E1000_VLVF(i), reg);
4473
4474                         /* do not modify RLPML for PF devices */
4475                         if (vf >= adapter->vfs_allocated_count)
4476                                 return 0;
4477
4478                         if (!adapter->vf_data[vf].vlans_enabled) {
4479                                 u32 size;
4480                                 reg = rd32(E1000_VMOLR(vf));
4481                                 size = reg & E1000_VMOLR_RLPML_MASK;
4482                                 size += 4;
4483                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4484                                 reg |= size;
4485                                 wr32(E1000_VMOLR(vf), reg);
4486                         }
4487
4488                         adapter->vf_data[vf].vlans_enabled++;
4489                         return 0;
4490                 }
4491         } else {
4492                 if (i < E1000_VLVF_ARRAY_SIZE) {
4493                         /* remove vf from the pool */
4494                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4495                         /* if pool is empty then remove entry from vfta */
4496                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4497                                 reg = 0;
4498                                 igb_vfta_set(hw, vid, false);
4499                         }
4500                         wr32(E1000_VLVF(i), reg);
4501
4502                         /* do not modify RLPML for PF devices */
4503                         if (vf >= adapter->vfs_allocated_count)
4504                                 return 0;
4505
4506                         adapter->vf_data[vf].vlans_enabled--;
4507                         if (!adapter->vf_data[vf].vlans_enabled) {
4508                                 u32 size;
4509                                 reg = rd32(E1000_VMOLR(vf));
4510                                 size = reg & E1000_VMOLR_RLPML_MASK;
4511                                 size -= 4;
4512                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4513                                 reg |= size;
4514                                 wr32(E1000_VMOLR(vf), reg);
4515                         }
4516                 }
4517         }
4518         return 0;
4519 }
4520
4521 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4522 {
4523         struct e1000_hw *hw = &adapter->hw;
4524
4525         if (vid)
4526                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4527         else
4528                 wr32(E1000_VMVIR(vf), 0);
4529 }
4530
4531 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4532                                int vf, u16 vlan, u8 qos)
4533 {
4534         int err = 0;
4535         struct igb_adapter *adapter = netdev_priv(netdev);
4536
4537         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4538                 return -EINVAL;
4539         if (vlan || qos) {
4540                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4541                 if (err)
4542                         goto out;
4543                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4544                 igb_set_vmolr(adapter, vf, !vlan);
4545                 adapter->vf_data[vf].pf_vlan = vlan;
4546                 adapter->vf_data[vf].pf_qos = qos;
4547                 dev_info(&adapter->pdev->dev,
4548                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4549                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4550                         dev_warn(&adapter->pdev->dev,
4551                                  "The VF VLAN has been set,"
4552                                  " but the PF device is not up.\n");
4553                         dev_warn(&adapter->pdev->dev,
4554                                  "Bring the PF device up before"
4555                                  " attempting to use the VF device.\n");
4556                 }
4557         } else {
4558                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4559                                    false, vf);
4560                 igb_set_vmvir(adapter, vlan, vf);
4561                 igb_set_vmolr(adapter, vf, true);
4562                 adapter->vf_data[vf].pf_vlan = 0;
4563                 adapter->vf_data[vf].pf_qos = 0;
4564        }
4565 out:
4566        return err;
4567 }
4568
4569 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4570 {
4571         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4572         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4573
4574         return igb_vlvf_set(adapter, vid, add, vf);
4575 }
4576
4577 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4578 {
4579         /* clear flags */
4580         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4581         adapter->vf_data[vf].last_nack = jiffies;
4582
4583         /* reset offloads to defaults */
4584         igb_set_vmolr(adapter, vf, true);
4585
4586         /* reset vlans for device */
4587         igb_clear_vf_vfta(adapter, vf);
4588         if (adapter->vf_data[vf].pf_vlan)
4589                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4590                                     adapter->vf_data[vf].pf_vlan,
4591                                     adapter->vf_data[vf].pf_qos);
4592         else
4593                 igb_clear_vf_vfta(adapter, vf);
4594
4595         /* reset multicast table array for vf */
4596         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4597
4598         /* Flush and reset the mta with the new values */
4599         igb_set_rx_mode(adapter->netdev);
4600 }
4601
4602 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4603 {
4604         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4605
4606         /* generate a new mac address as we were hotplug removed/added */
4607         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4608                 random_ether_addr(vf_mac);
4609
4610         /* process remaining reset events */
4611         igb_vf_reset(adapter, vf);
4612 }
4613
4614 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4615 {
4616         struct e1000_hw *hw = &adapter->hw;
4617         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4618         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4619         u32 reg, msgbuf[3];
4620         u8 *addr = (u8 *)(&msgbuf[1]);
4621
4622         /* process all the same items cleared in a function level reset */
4623         igb_vf_reset(adapter, vf);
4624
4625         /* set vf mac address */
4626         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4627
4628         /* enable transmit and receive for vf */
4629         reg = rd32(E1000_VFTE);
4630         wr32(E1000_VFTE, reg | (1 << vf));
4631         reg = rd32(E1000_VFRE);
4632         wr32(E1000_VFRE, reg | (1 << vf));
4633
4634         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4635
4636         /* reply to reset with ack and vf mac address */
4637         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4638         memcpy(addr, vf_mac, 6);
4639         igb_write_mbx(hw, msgbuf, 3, vf);
4640 }
4641
4642 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4643 {
4644         unsigned char *addr = (char *)&msg[1];
4645         int err = -1;
4646
4647         if (is_valid_ether_addr(addr))
4648                 err = igb_set_vf_mac(adapter, vf, addr);
4649
4650         return err;
4651 }
4652
4653 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4654 {
4655         struct e1000_hw *hw = &adapter->hw;
4656         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4657         u32 msg = E1000_VT_MSGTYPE_NACK;
4658
4659         /* if device isn't clear to send it shouldn't be reading either */
4660         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4661             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4662                 igb_write_mbx(hw, &msg, 1, vf);
4663                 vf_data->last_nack = jiffies;
4664         }
4665 }
4666
4667 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4668 {
4669         struct pci_dev *pdev = adapter->pdev;
4670         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4671         struct e1000_hw *hw = &adapter->hw;
4672         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4673         s32 retval;
4674
4675         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4676
4677         if (retval) {
4678                 /* if receive failed revoke VF CTS stats and restart init */
4679                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4680                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4681                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4682                         return;
4683                 goto out;
4684         }
4685
4686         /* this is a message we already processed, do nothing */
4687         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4688                 return;
4689
4690         /*
4691          * until the vf completes a reset it should not be
4692          * allowed to start any configuration.
4693          */
4694
4695         if (msgbuf[0] == E1000_VF_RESET) {
4696                 igb_vf_reset_msg(adapter, vf);
4697                 return;
4698         }
4699
4700         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4701                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4702                         return;
4703                 retval = -1;
4704                 goto out;
4705         }
4706
4707         switch ((msgbuf[0] & 0xFFFF)) {
4708         case E1000_VF_SET_MAC_ADDR:
4709                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4710                 break;
4711         case E1000_VF_SET_PROMISC:
4712                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4713                 break;
4714         case E1000_VF_SET_MULTICAST:
4715                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4716                 break;
4717         case E1000_VF_SET_LPE:
4718                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4719                 break;
4720         case E1000_VF_SET_VLAN:
4721                 if (adapter->vf_data[vf].pf_vlan)
4722                         retval = -1;
4723                 else
4724                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4725                 break;
4726         default:
4727                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4728                 retval = -1;
4729                 break;
4730         }
4731
4732         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4733 out:
4734         /* notify the VF of the results of what it sent us */
4735         if (retval)
4736                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4737         else
4738                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4739
4740         igb_write_mbx(hw, msgbuf, 1, vf);
4741 }
4742
4743 static void igb_msg_task(struct igb_adapter *adapter)
4744 {
4745         struct e1000_hw *hw = &adapter->hw;
4746         u32 vf;
4747
4748         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4749                 /* process any reset requests */
4750                 if (!igb_check_for_rst(hw, vf))
4751                         igb_vf_reset_event(adapter, vf);
4752
4753                 /* process any messages pending */
4754                 if (!igb_check_for_msg(hw, vf))
4755                         igb_rcv_msg_from_vf(adapter, vf);
4756
4757                 /* process any acks */
4758                 if (!igb_check_for_ack(hw, vf))
4759                         igb_rcv_ack_from_vf(adapter, vf);
4760         }
4761 }
4762
4763 /**
4764  *  igb_set_uta - Set unicast filter table address
4765  *  @adapter: board private structure
4766  *
4767  *  The unicast table address is a register array of 32-bit registers.
4768  *  The table is meant to be used in a way similar to how the MTA is used
4769  *  however due to certain limitations in the hardware it is necessary to
4770  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4771  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4772  **/
4773 static void igb_set_uta(struct igb_adapter *adapter)
4774 {
4775         struct e1000_hw *hw = &adapter->hw;
4776         int i;
4777
4778         /* The UTA table only exists on 82576 hardware and newer */
4779         if (hw->mac.type < e1000_82576)
4780                 return;
4781
4782         /* we only need to do this if VMDq is enabled */
4783         if (!adapter->vfs_allocated_count)
4784                 return;
4785
4786         for (i = 0; i < hw->mac.uta_reg_count; i++)
4787                 array_wr32(E1000_UTA, i, ~0);
4788 }
4789
4790 /**
4791  * igb_intr_msi - Interrupt Handler
4792  * @irq: interrupt number
4793  * @data: pointer to a network interface device structure
4794  **/
4795 static irqreturn_t igb_intr_msi(int irq, void *data)
4796 {
4797         struct igb_adapter *adapter = data;
4798         struct igb_q_vector *q_vector = adapter->q_vector[0];
4799         struct e1000_hw *hw = &adapter->hw;
4800         /* read ICR disables interrupts using IAM */
4801         u32 icr = rd32(E1000_ICR);
4802
4803         igb_write_itr(q_vector);
4804
4805         if (icr & E1000_ICR_DRSTA)
4806                 schedule_work(&adapter->reset_task);
4807
4808         if (icr & E1000_ICR_DOUTSYNC) {
4809                 /* HW is reporting DMA is out of sync */
4810                 adapter->stats.doosync++;
4811         }
4812
4813         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4814                 hw->mac.get_link_status = 1;
4815                 if (!test_bit(__IGB_DOWN, &adapter->state))
4816                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4817         }
4818
4819         napi_schedule(&q_vector->napi);
4820
4821         return IRQ_HANDLED;
4822 }
4823
4824 /**
4825  * igb_intr - Legacy Interrupt Handler
4826  * @irq: interrupt number
4827  * @data: pointer to a network interface device structure
4828  **/
4829 static irqreturn_t igb_intr(int irq, void *data)
4830 {
4831         struct igb_adapter *adapter = data;
4832         struct igb_q_vector *q_vector = adapter->q_vector[0];
4833         struct e1000_hw *hw = &adapter->hw;
4834         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4835          * need for the IMC write */
4836         u32 icr = rd32(E1000_ICR);
4837         if (!icr)
4838                 return IRQ_NONE;  /* Not our interrupt */
4839
4840         igb_write_itr(q_vector);
4841
4842         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4843          * not set, then the adapter didn't send an interrupt */
4844         if (!(icr & E1000_ICR_INT_ASSERTED))
4845                 return IRQ_NONE;
4846
4847         if (icr & E1000_ICR_DRSTA)
4848                 schedule_work(&adapter->reset_task);
4849
4850         if (icr & E1000_ICR_DOUTSYNC) {
4851                 /* HW is reporting DMA is out of sync */
4852                 adapter->stats.doosync++;
4853         }
4854
4855         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4856                 hw->mac.get_link_status = 1;
4857                 /* guard against interrupt when we're going down */
4858                 if (!test_bit(__IGB_DOWN, &adapter->state))
4859                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4860         }
4861
4862         napi_schedule(&q_vector->napi);
4863
4864         return IRQ_HANDLED;
4865 }
4866
4867 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4868 {
4869         struct igb_adapter *adapter = q_vector->adapter;
4870         struct e1000_hw *hw = &adapter->hw;
4871
4872         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4873             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4874                 if (!adapter->msix_entries)
4875                         igb_set_itr(adapter);
4876                 else
4877                         igb_update_ring_itr(q_vector);
4878         }
4879
4880         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4881                 if (adapter->msix_entries)
4882                         wr32(E1000_EIMS, q_vector->eims_value);
4883                 else
4884                         igb_irq_enable(adapter);
4885         }
4886 }
4887
4888 /**
4889  * igb_poll - NAPI Rx polling callback
4890  * @napi: napi polling structure
4891  * @budget: count of how many packets we should handle
4892  **/
4893 static int igb_poll(struct napi_struct *napi, int budget)
4894 {
4895         struct igb_q_vector *q_vector = container_of(napi,
4896                                                      struct igb_q_vector,
4897                                                      napi);
4898         int tx_clean_complete = 1, work_done = 0;
4899
4900 #ifdef CONFIG_IGB_DCA
4901         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4902                 igb_update_dca(q_vector);
4903 #endif
4904         if (q_vector->tx_ring)
4905                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4906
4907         if (q_vector->rx_ring)
4908                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4909
4910         if (!tx_clean_complete)
4911                 work_done = budget;
4912
4913         /* If not enough Rx work done, exit the polling mode */
4914         if (work_done < budget) {
4915                 napi_complete(napi);
4916                 igb_ring_irq_enable(q_vector);
4917         }
4918
4919         return work_done;
4920 }
4921
4922 /**
4923  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4924  * @adapter: board private structure
4925  * @shhwtstamps: timestamp structure to update
4926  * @regval: unsigned 64bit system time value.
4927  *
4928  * We need to convert the system time value stored in the RX/TXSTMP registers
4929  * into a hwtstamp which can be used by the upper level timestamping functions
4930  */
4931 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4932                                    struct skb_shared_hwtstamps *shhwtstamps,
4933                                    u64 regval)
4934 {
4935         u64 ns;
4936
4937         /*
4938          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4939          * 24 to match clock shift we setup earlier.
4940          */
4941         if (adapter->hw.mac.type == e1000_82580)
4942                 regval <<= IGB_82580_TSYNC_SHIFT;
4943
4944         ns = timecounter_cyc2time(&adapter->clock, regval);
4945         timecompare_update(&adapter->compare, ns);
4946         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4947         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4948         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4949 }
4950
4951 /**
4952  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4953  * @q_vector: pointer to q_vector containing needed info
4954  * @skb: packet that was just sent
4955  *
4956  * If we were asked to do hardware stamping and such a time stamp is
4957  * available, then it must have been for this skb here because we only
4958  * allow only one such packet into the queue.
4959  */
4960 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4961 {
4962         struct igb_adapter *adapter = q_vector->adapter;
4963         union skb_shared_tx *shtx = skb_tx(skb);
4964         struct e1000_hw *hw = &adapter->hw;
4965         struct skb_shared_hwtstamps shhwtstamps;
4966         u64 regval;
4967
4968         /* if skb does not support hw timestamp or TX stamp not valid exit */
4969         if (likely(!shtx->hardware) ||
4970             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4971                 return;
4972
4973         regval = rd32(E1000_TXSTMPL);
4974         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4975
4976         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4977         skb_tstamp_tx(skb, &shhwtstamps);
4978 }
4979
4980 /**
4981  * igb_clean_tx_irq - Reclaim resources after transmit completes
4982  * @q_vector: pointer to q_vector containing needed info
4983  * returns true if ring is completely cleaned
4984  **/
4985 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4986 {
4987         struct igb_adapter *adapter = q_vector->adapter;
4988         struct igb_ring *tx_ring = q_vector->tx_ring;
4989         struct net_device *netdev = tx_ring->netdev;
4990         struct e1000_hw *hw = &adapter->hw;
4991         struct igb_buffer *buffer_info;
4992         struct sk_buff *skb;
4993         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4994         unsigned int total_bytes = 0, total_packets = 0;
4995         unsigned int i, eop, count = 0;
4996         bool cleaned = false;
4997
4998         i = tx_ring->next_to_clean;
4999         eop = tx_ring->buffer_info[i].next_to_watch;
5000         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5001
5002         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5003                (count < tx_ring->count)) {
5004                 for (cleaned = false; !cleaned; count++) {
5005                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5006                         buffer_info = &tx_ring->buffer_info[i];
5007                         cleaned = (i == eop);
5008                         skb = buffer_info->skb;
5009
5010                         if (skb) {
5011                                 unsigned int segs, bytecount;
5012                                 /* gso_segs is currently only valid for tcp */
5013                                 segs = buffer_info->gso_segs;
5014                                 /* multiply data chunks by size of headers */
5015                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5016                                             skb->len;
5017                                 total_packets += segs;
5018                                 total_bytes += bytecount;
5019
5020                                 igb_tx_hwtstamp(q_vector, skb);
5021                         }
5022
5023                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5024                         tx_desc->wb.status = 0;
5025
5026                         i++;
5027                         if (i == tx_ring->count)
5028                                 i = 0;
5029                 }
5030                 eop = tx_ring->buffer_info[i].next_to_watch;
5031                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5032         }
5033
5034         tx_ring->next_to_clean = i;
5035
5036         if (unlikely(count &&
5037                      netif_carrier_ok(netdev) &&
5038                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5039                 /* Make sure that anybody stopping the queue after this
5040                  * sees the new next_to_clean.
5041                  */
5042                 smp_mb();
5043                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5044                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5045                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5046                         tx_ring->tx_stats.restart_queue++;
5047                 }
5048         }
5049
5050         if (tx_ring->detect_tx_hung) {
5051                 /* Detect a transmit hang in hardware, this serializes the
5052                  * check with the clearing of time_stamp and movement of i */
5053                 tx_ring->detect_tx_hung = false;
5054                 if (tx_ring->buffer_info[i].time_stamp &&
5055                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5056                                (adapter->tx_timeout_factor * HZ)) &&
5057                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5058
5059                         /* detected Tx unit hang */
5060                         dev_err(&tx_ring->pdev->dev,
5061                                 "Detected Tx Unit Hang\n"
5062                                 "  Tx Queue             <%d>\n"
5063                                 "  TDH                  <%x>\n"
5064                                 "  TDT                  <%x>\n"
5065                                 "  next_to_use          <%x>\n"
5066                                 "  next_to_clean        <%x>\n"
5067                                 "buffer_info[next_to_clean]\n"
5068                                 "  time_stamp           <%lx>\n"
5069                                 "  next_to_watch        <%x>\n"
5070                                 "  jiffies              <%lx>\n"
5071                                 "  desc.status          <%x>\n",
5072                                 tx_ring->queue_index,
5073                                 readl(tx_ring->head),
5074                                 readl(tx_ring->tail),
5075                                 tx_ring->next_to_use,
5076                                 tx_ring->next_to_clean,
5077                                 tx_ring->buffer_info[eop].time_stamp,
5078                                 eop,
5079                                 jiffies,
5080                                 eop_desc->wb.status);
5081                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5082                 }
5083         }
5084         tx_ring->total_bytes += total_bytes;
5085         tx_ring->total_packets += total_packets;
5086         tx_ring->tx_stats.bytes += total_bytes;
5087         tx_ring->tx_stats.packets += total_packets;
5088         return (count < tx_ring->count);
5089 }
5090
5091 /**
5092  * igb_receive_skb - helper function to handle rx indications
5093  * @q_vector: structure containing interrupt and ring information
5094  * @skb: packet to send up
5095  * @vlan_tag: vlan tag for packet
5096  **/
5097 static void igb_receive_skb(struct igb_q_vector *q_vector,
5098                             struct sk_buff *skb,
5099                             u16 vlan_tag)
5100 {
5101         struct igb_adapter *adapter = q_vector->adapter;
5102
5103         if (vlan_tag && adapter->vlgrp)
5104                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5105                                  vlan_tag, skb);
5106         else
5107                 napi_gro_receive(&q_vector->napi, skb);
5108 }
5109
5110 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5111                                        u32 status_err, struct sk_buff *skb)
5112 {
5113         skb->ip_summed = CHECKSUM_NONE;
5114
5115         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5116         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5117              (status_err & E1000_RXD_STAT_IXSM))
5118                 return;
5119
5120         /* TCP/UDP checksum error bit is set */
5121         if (status_err &
5122             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5123                 /*
5124                  * work around errata with sctp packets where the TCPE aka
5125                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5126                  * packets, (aka let the stack check the crc32c)
5127                  */
5128                 if ((skb->len == 60) &&
5129                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5130                         ring->rx_stats.csum_err++;
5131
5132                 /* let the stack verify checksum errors */
5133                 return;
5134         }
5135         /* It must be a TCP or UDP packet with a valid checksum */
5136         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5137                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5138
5139         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5140 }
5141
5142 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5143                                    struct sk_buff *skb)
5144 {
5145         struct igb_adapter *adapter = q_vector->adapter;
5146         struct e1000_hw *hw = &adapter->hw;
5147         u64 regval;
5148
5149         /*
5150          * If this bit is set, then the RX registers contain the time stamp. No
5151          * other packet will be time stamped until we read these registers, so
5152          * read the registers to make them available again. Because only one
5153          * packet can be time stamped at a time, we know that the register
5154          * values must belong to this one here and therefore we don't need to
5155          * compare any of the additional attributes stored for it.
5156          *
5157          * If nothing went wrong, then it should have a skb_shared_tx that we
5158          * can turn into a skb_shared_hwtstamps.
5159          */
5160         if (staterr & E1000_RXDADV_STAT_TSIP) {
5161                 u32 *stamp = (u32 *)skb->data;
5162                 regval = le32_to_cpu(*(stamp + 2));
5163                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5164                 skb_pull(skb, IGB_TS_HDR_LEN);
5165         } else {
5166                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5167                         return;
5168
5169                 regval = rd32(E1000_RXSTMPL);
5170                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5171         }
5172
5173         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5174 }
5175 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5176                                union e1000_adv_rx_desc *rx_desc)
5177 {
5178         /* HW will not DMA in data larger than the given buffer, even if it
5179          * parses the (NFS, of course) header to be larger.  In that case, it
5180          * fills the header buffer and spills the rest into the page.
5181          */
5182         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5183                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5184         if (hlen > rx_ring->rx_buffer_len)
5185                 hlen = rx_ring->rx_buffer_len;
5186         return hlen;
5187 }
5188
5189 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5190                                  int *work_done, int budget)
5191 {
5192         struct igb_ring *rx_ring = q_vector->rx_ring;
5193         struct net_device *netdev = rx_ring->netdev;
5194         struct pci_dev *pdev = rx_ring->pdev;
5195         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5196         struct igb_buffer *buffer_info , *next_buffer;
5197         struct sk_buff *skb;
5198         bool cleaned = false;
5199         int cleaned_count = 0;
5200         int current_node = numa_node_id();
5201         unsigned int total_bytes = 0, total_packets = 0;
5202         unsigned int i;
5203         u32 staterr;
5204         u16 length;
5205         u16 vlan_tag;
5206
5207         i = rx_ring->next_to_clean;
5208         buffer_info = &rx_ring->buffer_info[i];
5209         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5210         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5211
5212         while (staterr & E1000_RXD_STAT_DD) {
5213                 if (*work_done >= budget)
5214                         break;
5215                 (*work_done)++;
5216
5217                 skb = buffer_info->skb;
5218                 prefetch(skb->data - NET_IP_ALIGN);
5219                 buffer_info->skb = NULL;
5220
5221                 i++;
5222                 if (i == rx_ring->count)
5223                         i = 0;
5224
5225                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5226                 prefetch(next_rxd);
5227                 next_buffer = &rx_ring->buffer_info[i];
5228
5229                 length = le16_to_cpu(rx_desc->wb.upper.length);
5230                 cleaned = true;
5231                 cleaned_count++;
5232
5233                 if (buffer_info->dma) {
5234                         pci_unmap_single(pdev, buffer_info->dma,
5235                                          rx_ring->rx_buffer_len,
5236                                          PCI_DMA_FROMDEVICE);
5237                         buffer_info->dma = 0;
5238                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5239                                 skb_put(skb, length);
5240                                 goto send_up;
5241                         }
5242                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5243                 }
5244
5245                 if (length) {
5246                         pci_unmap_page(pdev, buffer_info->page_dma,
5247                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5248                         buffer_info->page_dma = 0;
5249
5250                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags++,
5251                                                 buffer_info->page,
5252                                                 buffer_info->page_offset,
5253                                                 length);
5254
5255                         if ((page_count(buffer_info->page) != 1) ||
5256                             (page_to_nid(buffer_info->page) != current_node))
5257                                 buffer_info->page = NULL;
5258                         else
5259                                 get_page(buffer_info->page);
5260
5261                         skb->len += length;
5262                         skb->data_len += length;
5263                         skb->truesize += length;
5264                 }
5265
5266                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5267                         buffer_info->skb = next_buffer->skb;
5268                         buffer_info->dma = next_buffer->dma;
5269                         next_buffer->skb = skb;
5270                         next_buffer->dma = 0;
5271                         goto next_desc;
5272                 }
5273 send_up:
5274                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5275                         dev_kfree_skb_irq(skb);
5276                         goto next_desc;
5277                 }
5278
5279                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5280                         igb_rx_hwtstamp(q_vector, staterr, skb);
5281                 total_bytes += skb->len;
5282                 total_packets++;
5283
5284                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5285
5286                 skb->protocol = eth_type_trans(skb, netdev);
5287                 skb_record_rx_queue(skb, rx_ring->queue_index);
5288
5289                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5290                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5291
5292                 igb_receive_skb(q_vector, skb, vlan_tag);
5293
5294 next_desc:
5295                 rx_desc->wb.upper.status_error = 0;
5296
5297                 /* return some buffers to hardware, one at a time is too slow */
5298                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5299                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5300                         cleaned_count = 0;
5301                 }
5302
5303                 /* use prefetched values */
5304                 rx_desc = next_rxd;
5305                 buffer_info = next_buffer;
5306                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5307         }
5308
5309         rx_ring->next_to_clean = i;
5310         cleaned_count = igb_desc_unused(rx_ring);
5311
5312         if (cleaned_count)
5313                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5314
5315         rx_ring->total_packets += total_packets;
5316         rx_ring->total_bytes += total_bytes;
5317         rx_ring->rx_stats.packets += total_packets;
5318         rx_ring->rx_stats.bytes += total_bytes;
5319         return cleaned;
5320 }
5321
5322 /**
5323  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5324  * @adapter: address of board private structure
5325  **/
5326 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5327 {
5328         struct net_device *netdev = rx_ring->netdev;
5329         union e1000_adv_rx_desc *rx_desc;
5330         struct igb_buffer *buffer_info;
5331         struct sk_buff *skb;
5332         unsigned int i;
5333         int bufsz;
5334
5335         i = rx_ring->next_to_use;
5336         buffer_info = &rx_ring->buffer_info[i];
5337
5338         bufsz = rx_ring->rx_buffer_len;
5339
5340         while (cleaned_count--) {
5341                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5342
5343                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5344                         if (!buffer_info->page) {
5345                                 buffer_info->page = netdev_alloc_page(netdev);
5346                                 if (!buffer_info->page) {
5347                                         rx_ring->rx_stats.alloc_failed++;
5348                                         goto no_buffers;
5349                                 }
5350                                 buffer_info->page_offset = 0;
5351                         } else {
5352                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5353                         }
5354                         buffer_info->page_dma =
5355                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5356                                              buffer_info->page_offset,
5357                                              PAGE_SIZE / 2,
5358                                              PCI_DMA_FROMDEVICE);
5359                         if (pci_dma_mapping_error(rx_ring->pdev,
5360                                                   buffer_info->page_dma)) {
5361                                 buffer_info->page_dma = 0;
5362                                 rx_ring->rx_stats.alloc_failed++;
5363                                 goto no_buffers;
5364                         }
5365                 }
5366
5367                 skb = buffer_info->skb;
5368                 if (!skb) {
5369                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5370                         if (!skb) {
5371                                 rx_ring->rx_stats.alloc_failed++;
5372                                 goto no_buffers;
5373                         }
5374
5375                         buffer_info->skb = skb;
5376                 }
5377                 if (!buffer_info->dma) {
5378                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5379                                                           skb->data,
5380                                                           bufsz,
5381                                                           PCI_DMA_FROMDEVICE);
5382                         if (pci_dma_mapping_error(rx_ring->pdev,
5383                                                   buffer_info->dma)) {
5384                                 buffer_info->dma = 0;
5385                                 rx_ring->rx_stats.alloc_failed++;
5386                                 goto no_buffers;
5387                         }
5388                 }
5389                 /* Refresh the desc even if buffer_addrs didn't change because
5390                  * each write-back erases this info. */
5391                 if (bufsz < IGB_RXBUFFER_1024) {
5392                         rx_desc->read.pkt_addr =
5393                              cpu_to_le64(buffer_info->page_dma);
5394                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5395                 } else {
5396                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5397                         rx_desc->read.hdr_addr = 0;
5398                 }
5399
5400                 i++;
5401                 if (i == rx_ring->count)
5402                         i = 0;
5403                 buffer_info = &rx_ring->buffer_info[i];
5404         }
5405
5406 no_buffers:
5407         if (rx_ring->next_to_use != i) {
5408                 rx_ring->next_to_use = i;
5409                 if (i == 0)
5410                         i = (rx_ring->count - 1);
5411                 else
5412                         i--;
5413
5414                 /* Force memory writes to complete before letting h/w
5415                  * know there are new descriptors to fetch.  (Only
5416                  * applicable for weak-ordered memory model archs,
5417                  * such as IA-64). */
5418                 wmb();
5419                 writel(i, rx_ring->tail);
5420         }
5421 }
5422
5423 /**
5424  * igb_mii_ioctl -
5425  * @netdev:
5426  * @ifreq:
5427  * @cmd:
5428  **/
5429 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5430 {
5431         struct igb_adapter *adapter = netdev_priv(netdev);
5432         struct mii_ioctl_data *data = if_mii(ifr);
5433
5434         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5435                 return -EOPNOTSUPP;
5436
5437         switch (cmd) {
5438         case SIOCGMIIPHY:
5439                 data->phy_id = adapter->hw.phy.addr;
5440                 break;
5441         case SIOCGMIIREG:
5442                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5443                                      &data->val_out))
5444                         return -EIO;
5445                 break;
5446         case SIOCSMIIREG:
5447         default:
5448                 return -EOPNOTSUPP;
5449         }
5450         return 0;
5451 }
5452
5453 /**
5454  * igb_hwtstamp_ioctl - control hardware time stamping
5455  * @netdev:
5456  * @ifreq:
5457  * @cmd:
5458  *
5459  * Outgoing time stamping can be enabled and disabled. Play nice and
5460  * disable it when requested, although it shouldn't case any overhead
5461  * when no packet needs it. At most one packet in the queue may be
5462  * marked for time stamping, otherwise it would be impossible to tell
5463  * for sure to which packet the hardware time stamp belongs.
5464  *
5465  * Incoming time stamping has to be configured via the hardware
5466  * filters. Not all combinations are supported, in particular event
5467  * type has to be specified. Matching the kind of event packet is
5468  * not supported, with the exception of "all V2 events regardless of
5469  * level 2 or 4".
5470  *
5471  **/
5472 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5473                               struct ifreq *ifr, int cmd)
5474 {
5475         struct igb_adapter *adapter = netdev_priv(netdev);
5476         struct e1000_hw *hw = &adapter->hw;
5477         struct hwtstamp_config config;
5478         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5479         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5480         u32 tsync_rx_cfg = 0;
5481         bool is_l4 = false;
5482         bool is_l2 = false;
5483         u32 regval;
5484
5485         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5486                 return -EFAULT;
5487
5488         /* reserved for future extensions */
5489         if (config.flags)
5490                 return -EINVAL;
5491
5492         switch (config.tx_type) {
5493         case HWTSTAMP_TX_OFF:
5494                 tsync_tx_ctl = 0;
5495         case HWTSTAMP_TX_ON:
5496                 break;
5497         default:
5498                 return -ERANGE;
5499         }
5500
5501         switch (config.rx_filter) {
5502         case HWTSTAMP_FILTER_NONE:
5503                 tsync_rx_ctl = 0;
5504                 break;
5505         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5506         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5507         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5508         case HWTSTAMP_FILTER_ALL:
5509                 /*
5510                  * register TSYNCRXCFG must be set, therefore it is not
5511                  * possible to time stamp both Sync and Delay_Req messages
5512                  * => fall back to time stamping all packets
5513                  */
5514                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5515                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5516                 break;
5517         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5518                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5519                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5520                 is_l4 = true;
5521                 break;
5522         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5523                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5524                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5525                 is_l4 = true;
5526                 break;
5527         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5528         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5529                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5530                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5531                 is_l2 = true;
5532                 is_l4 = true;
5533                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5534                 break;
5535         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5536         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5537                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5538                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5539                 is_l2 = true;
5540                 is_l4 = true;
5541                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5542                 break;
5543         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5544         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5545         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5546                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5547                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5548                 is_l2 = true;
5549                 break;
5550         default:
5551                 return -ERANGE;
5552         }
5553
5554         if (hw->mac.type == e1000_82575) {
5555                 if (tsync_rx_ctl | tsync_tx_ctl)
5556                         return -EINVAL;
5557                 return 0;
5558         }
5559
5560         /*
5561          * Per-packet timestamping only works if all packets are
5562          * timestamped, so enable timestamping in all packets as
5563          * long as one rx filter was configured.
5564          */
5565         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5566                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5567                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5568         }
5569
5570         /* enable/disable TX */
5571         regval = rd32(E1000_TSYNCTXCTL);
5572         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5573         regval |= tsync_tx_ctl;
5574         wr32(E1000_TSYNCTXCTL, regval);
5575
5576         /* enable/disable RX */
5577         regval = rd32(E1000_TSYNCRXCTL);
5578         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5579         regval |= tsync_rx_ctl;
5580         wr32(E1000_TSYNCRXCTL, regval);
5581
5582         /* define which PTP packets are time stamped */
5583         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5584
5585         /* define ethertype filter for timestamped packets */
5586         if (is_l2)
5587                 wr32(E1000_ETQF(3),
5588                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5589                                  E1000_ETQF_1588 | /* enable timestamping */
5590                                  ETH_P_1588));     /* 1588 eth protocol type */
5591         else
5592                 wr32(E1000_ETQF(3), 0);
5593
5594 #define PTP_PORT 319
5595         /* L4 Queue Filter[3]: filter by destination port and protocol */
5596         if (is_l4) {
5597                 u32 ftqf = (IPPROTO_UDP /* UDP */
5598                         | E1000_FTQF_VF_BP /* VF not compared */
5599                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5600                         | E1000_FTQF_MASK); /* mask all inputs */
5601                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5602
5603                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5604                 wr32(E1000_IMIREXT(3),
5605                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5606                 if (hw->mac.type == e1000_82576) {
5607                         /* enable source port check */
5608                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5609                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5610                 }
5611                 wr32(E1000_FTQF(3), ftqf);
5612         } else {
5613                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5614         }
5615         wrfl();
5616
5617         adapter->hwtstamp_config = config;
5618
5619         /* clear TX/RX time stamp registers, just to be sure */
5620         regval = rd32(E1000_TXSTMPH);
5621         regval = rd32(E1000_RXSTMPH);
5622
5623         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5624                 -EFAULT : 0;
5625 }
5626
5627 /**
5628  * igb_ioctl -
5629  * @netdev:
5630  * @ifreq:
5631  * @cmd:
5632  **/
5633 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5634 {
5635         switch (cmd) {
5636         case SIOCGMIIPHY:
5637         case SIOCGMIIREG:
5638         case SIOCSMIIREG:
5639                 return igb_mii_ioctl(netdev, ifr, cmd);
5640         case SIOCSHWTSTAMP:
5641                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5642         default:
5643                 return -EOPNOTSUPP;
5644         }
5645 }
5646
5647 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5648 {
5649         struct igb_adapter *adapter = hw->back;
5650         u16 cap_offset;
5651
5652         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5653         if (!cap_offset)
5654                 return -E1000_ERR_CONFIG;
5655
5656         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5657
5658         return 0;
5659 }
5660
5661 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5662 {
5663         struct igb_adapter *adapter = hw->back;
5664         u16 cap_offset;
5665
5666         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5667         if (!cap_offset)
5668                 return -E1000_ERR_CONFIG;
5669
5670         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5671
5672         return 0;
5673 }
5674
5675 static void igb_vlan_rx_register(struct net_device *netdev,
5676                                  struct vlan_group *grp)
5677 {
5678         struct igb_adapter *adapter = netdev_priv(netdev);
5679         struct e1000_hw *hw = &adapter->hw;
5680         u32 ctrl, rctl;
5681
5682         igb_irq_disable(adapter);
5683         adapter->vlgrp = grp;
5684
5685         if (grp) {
5686                 /* enable VLAN tag insert/strip */
5687                 ctrl = rd32(E1000_CTRL);
5688                 ctrl |= E1000_CTRL_VME;
5689                 wr32(E1000_CTRL, ctrl);
5690
5691                 /* Disable CFI check */
5692                 rctl = rd32(E1000_RCTL);
5693                 rctl &= ~E1000_RCTL_CFIEN;
5694                 wr32(E1000_RCTL, rctl);
5695         } else {
5696                 /* disable VLAN tag insert/strip */
5697                 ctrl = rd32(E1000_CTRL);
5698                 ctrl &= ~E1000_CTRL_VME;
5699                 wr32(E1000_CTRL, ctrl);
5700         }
5701
5702         igb_rlpml_set(adapter);
5703
5704         if (!test_bit(__IGB_DOWN, &adapter->state))
5705                 igb_irq_enable(adapter);
5706 }
5707
5708 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5709 {
5710         struct igb_adapter *adapter = netdev_priv(netdev);
5711         struct e1000_hw *hw = &adapter->hw;
5712         int pf_id = adapter->vfs_allocated_count;
5713
5714         /* attempt to add filter to vlvf array */
5715         igb_vlvf_set(adapter, vid, true, pf_id);
5716
5717         /* add the filter since PF can receive vlans w/o entry in vlvf */
5718         igb_vfta_set(hw, vid, true);
5719 }
5720
5721 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5722 {
5723         struct igb_adapter *adapter = netdev_priv(netdev);
5724         struct e1000_hw *hw = &adapter->hw;
5725         int pf_id = adapter->vfs_allocated_count;
5726         s32 err;
5727
5728         igb_irq_disable(adapter);
5729         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5730
5731         if (!test_bit(__IGB_DOWN, &adapter->state))
5732                 igb_irq_enable(adapter);
5733
5734         /* remove vlan from VLVF table array */
5735         err = igb_vlvf_set(adapter, vid, false, pf_id);
5736
5737         /* if vid was not present in VLVF just remove it from table */
5738         if (err)
5739                 igb_vfta_set(hw, vid, false);
5740 }
5741
5742 static void igb_restore_vlan(struct igb_adapter *adapter)
5743 {
5744         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5745
5746         if (adapter->vlgrp) {
5747                 u16 vid;
5748                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5749                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5750                                 continue;
5751                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5752                 }
5753         }
5754 }
5755
5756 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5757 {
5758         struct pci_dev *pdev = adapter->pdev;
5759         struct e1000_mac_info *mac = &adapter->hw.mac;
5760
5761         mac->autoneg = 0;
5762
5763         switch (spddplx) {
5764         case SPEED_10 + DUPLEX_HALF:
5765                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5766                 break;
5767         case SPEED_10 + DUPLEX_FULL:
5768                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5769                 break;
5770         case SPEED_100 + DUPLEX_HALF:
5771                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5772                 break;
5773         case SPEED_100 + DUPLEX_FULL:
5774                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5775                 break;
5776         case SPEED_1000 + DUPLEX_FULL:
5777                 mac->autoneg = 1;
5778                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5779                 break;
5780         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5781         default:
5782                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5783                 return -EINVAL;
5784         }
5785         return 0;
5786 }
5787
5788 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5789 {
5790         struct net_device *netdev = pci_get_drvdata(pdev);
5791         struct igb_adapter *adapter = netdev_priv(netdev);
5792         struct e1000_hw *hw = &adapter->hw;
5793         u32 ctrl, rctl, status;
5794         u32 wufc = adapter->wol;
5795 #ifdef CONFIG_PM
5796         int retval = 0;
5797 #endif
5798
5799         netif_device_detach(netdev);
5800
5801         if (netif_running(netdev))
5802                 igb_close(netdev);
5803
5804         igb_clear_interrupt_scheme(adapter);
5805
5806 #ifdef CONFIG_PM
5807         retval = pci_save_state(pdev);
5808         if (retval)
5809                 return retval;
5810 #endif
5811
5812         status = rd32(E1000_STATUS);
5813         if (status & E1000_STATUS_LU)
5814                 wufc &= ~E1000_WUFC_LNKC;
5815
5816         if (wufc) {
5817                 igb_setup_rctl(adapter);
5818                 igb_set_rx_mode(netdev);
5819
5820                 /* turn on all-multi mode if wake on multicast is enabled */
5821                 if (wufc & E1000_WUFC_MC) {
5822                         rctl = rd32(E1000_RCTL);
5823                         rctl |= E1000_RCTL_MPE;
5824                         wr32(E1000_RCTL, rctl);
5825                 }
5826
5827                 ctrl = rd32(E1000_CTRL);
5828                 /* advertise wake from D3Cold */
5829                 #define E1000_CTRL_ADVD3WUC 0x00100000
5830                 /* phy power management enable */
5831                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5832                 ctrl |= E1000_CTRL_ADVD3WUC;
5833                 wr32(E1000_CTRL, ctrl);
5834
5835                 /* Allow time for pending master requests to run */
5836                 igb_disable_pcie_master(hw);
5837
5838                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5839                 wr32(E1000_WUFC, wufc);
5840         } else {
5841                 wr32(E1000_WUC, 0);
5842                 wr32(E1000_WUFC, 0);
5843         }
5844
5845         *enable_wake = wufc || adapter->en_mng_pt;
5846         if (!*enable_wake)
5847                 igb_power_down_link(adapter);
5848         else
5849                 igb_power_up_link(adapter);
5850
5851         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5852          * would have already happened in close and is redundant. */
5853         igb_release_hw_control(adapter);
5854
5855         pci_disable_device(pdev);
5856
5857         return 0;
5858 }
5859
5860 #ifdef CONFIG_PM
5861 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5862 {
5863         int retval;
5864         bool wake;
5865
5866         retval = __igb_shutdown(pdev, &wake);
5867         if (retval)
5868                 return retval;
5869
5870         if (wake) {
5871                 pci_prepare_to_sleep(pdev);
5872         } else {
5873                 pci_wake_from_d3(pdev, false);
5874                 pci_set_power_state(pdev, PCI_D3hot);
5875         }
5876
5877         return 0;
5878 }
5879
5880 static int igb_resume(struct pci_dev *pdev)
5881 {
5882         struct net_device *netdev = pci_get_drvdata(pdev);
5883         struct igb_adapter *adapter = netdev_priv(netdev);
5884         struct e1000_hw *hw = &adapter->hw;
5885         u32 err;
5886
5887         pci_set_power_state(pdev, PCI_D0);
5888         pci_restore_state(pdev);
5889         pci_save_state(pdev);
5890
5891         err = pci_enable_device_mem(pdev);
5892         if (err) {
5893                 dev_err(&pdev->dev,
5894                         "igb: Cannot enable PCI device from suspend\n");
5895                 return err;
5896         }
5897         pci_set_master(pdev);
5898
5899         pci_enable_wake(pdev, PCI_D3hot, 0);
5900         pci_enable_wake(pdev, PCI_D3cold, 0);
5901
5902         if (igb_init_interrupt_scheme(adapter)) {
5903                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5904                 return -ENOMEM;
5905         }
5906
5907         igb_reset(adapter);
5908
5909         /* let the f/w know that the h/w is now under the control of the
5910          * driver. */
5911         igb_get_hw_control(adapter);
5912
5913         wr32(E1000_WUS, ~0);
5914
5915         if (netif_running(netdev)) {
5916                 err = igb_open(netdev);
5917                 if (err)
5918                         return err;
5919         }
5920
5921         netif_device_attach(netdev);
5922
5923         return 0;
5924 }
5925 #endif
5926
5927 static void igb_shutdown(struct pci_dev *pdev)
5928 {
5929         bool wake;
5930
5931         __igb_shutdown(pdev, &wake);
5932
5933         if (system_state == SYSTEM_POWER_OFF) {
5934                 pci_wake_from_d3(pdev, wake);
5935                 pci_set_power_state(pdev, PCI_D3hot);
5936         }
5937 }
5938
5939 #ifdef CONFIG_NET_POLL_CONTROLLER
5940 /*
5941  * Polling 'interrupt' - used by things like netconsole to send skbs
5942  * without having to re-enable interrupts. It's not called while
5943  * the interrupt routine is executing.
5944  */
5945 static void igb_netpoll(struct net_device *netdev)
5946 {
5947         struct igb_adapter *adapter = netdev_priv(netdev);
5948         struct e1000_hw *hw = &adapter->hw;
5949         int i;
5950
5951         if (!adapter->msix_entries) {
5952                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5953                 igb_irq_disable(adapter);
5954                 napi_schedule(&q_vector->napi);
5955                 return;
5956         }
5957
5958         for (i = 0; i < adapter->num_q_vectors; i++) {
5959                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5960                 wr32(E1000_EIMC, q_vector->eims_value);
5961                 napi_schedule(&q_vector->napi);
5962         }
5963 }
5964 #endif /* CONFIG_NET_POLL_CONTROLLER */
5965
5966 /**
5967  * igb_io_error_detected - called when PCI error is detected
5968  * @pdev: Pointer to PCI device
5969  * @state: The current pci connection state
5970  *
5971  * This function is called after a PCI bus error affecting
5972  * this device has been detected.
5973  */
5974 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5975                                               pci_channel_state_t state)
5976 {
5977         struct net_device *netdev = pci_get_drvdata(pdev);
5978         struct igb_adapter *adapter = netdev_priv(netdev);
5979
5980         netif_device_detach(netdev);
5981
5982         if (state == pci_channel_io_perm_failure)
5983                 return PCI_ERS_RESULT_DISCONNECT;
5984
5985         if (netif_running(netdev))
5986                 igb_down(adapter);
5987         pci_disable_device(pdev);
5988
5989         /* Request a slot slot reset. */
5990         return PCI_ERS_RESULT_NEED_RESET;
5991 }
5992
5993 /**
5994  * igb_io_slot_reset - called after the pci bus has been reset.
5995  * @pdev: Pointer to PCI device
5996  *
5997  * Restart the card from scratch, as if from a cold-boot. Implementation
5998  * resembles the first-half of the igb_resume routine.
5999  */
6000 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6001 {
6002         struct net_device *netdev = pci_get_drvdata(pdev);
6003         struct igb_adapter *adapter = netdev_priv(netdev);
6004         struct e1000_hw *hw = &adapter->hw;
6005         pci_ers_result_t result;
6006         int err;
6007
6008         if (pci_enable_device_mem(pdev)) {
6009                 dev_err(&pdev->dev,
6010                         "Cannot re-enable PCI device after reset.\n");
6011                 result = PCI_ERS_RESULT_DISCONNECT;
6012         } else {
6013                 pci_set_master(pdev);
6014                 pci_restore_state(pdev);
6015                 pci_save_state(pdev);
6016
6017                 pci_enable_wake(pdev, PCI_D3hot, 0);
6018                 pci_enable_wake(pdev, PCI_D3cold, 0);
6019
6020                 igb_reset(adapter);
6021                 wr32(E1000_WUS, ~0);
6022                 result = PCI_ERS_RESULT_RECOVERED;
6023         }
6024
6025         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6026         if (err) {
6027                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6028                         "failed 0x%0x\n", err);
6029                 /* non-fatal, continue */
6030         }
6031
6032         return result;
6033 }
6034
6035 /**
6036  * igb_io_resume - called when traffic can start flowing again.
6037  * @pdev: Pointer to PCI device
6038  *
6039  * This callback is called when the error recovery driver tells us that
6040  * its OK to resume normal operation. Implementation resembles the
6041  * second-half of the igb_resume routine.
6042  */
6043 static void igb_io_resume(struct pci_dev *pdev)
6044 {
6045         struct net_device *netdev = pci_get_drvdata(pdev);
6046         struct igb_adapter *adapter = netdev_priv(netdev);
6047
6048         if (netif_running(netdev)) {
6049                 if (igb_up(adapter)) {
6050                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6051                         return;
6052                 }
6053         }
6054
6055         netif_device_attach(netdev);
6056
6057         /* let the f/w know that the h/w is now under the control of the
6058          * driver. */
6059         igb_get_hw_control(adapter);
6060 }
6061
6062 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6063                              u8 qsel)
6064 {
6065         u32 rar_low, rar_high;
6066         struct e1000_hw *hw = &adapter->hw;
6067
6068         /* HW expects these in little endian so we reverse the byte order
6069          * from network order (big endian) to little endian
6070          */
6071         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6072                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6073         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6074
6075         /* Indicate to hardware the Address is Valid. */
6076         rar_high |= E1000_RAH_AV;
6077
6078         if (hw->mac.type == e1000_82575)
6079                 rar_high |= E1000_RAH_POOL_1 * qsel;
6080         else
6081                 rar_high |= E1000_RAH_POOL_1 << qsel;
6082
6083         wr32(E1000_RAL(index), rar_low);
6084         wrfl();
6085         wr32(E1000_RAH(index), rar_high);
6086         wrfl();
6087 }
6088
6089 static int igb_set_vf_mac(struct igb_adapter *adapter,
6090                           int vf, unsigned char *mac_addr)
6091 {
6092         struct e1000_hw *hw = &adapter->hw;
6093         /* VF MAC addresses start at end of receive addresses and moves
6094          * torwards the first, as a result a collision should not be possible */
6095         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6096
6097         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6098
6099         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6100
6101         return 0;
6102 }
6103
6104 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6105 {
6106         struct igb_adapter *adapter = netdev_priv(netdev);
6107         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6108                 return -EINVAL;
6109         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6110         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6111         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6112                                       " change effective.");
6113         if (test_bit(__IGB_DOWN, &adapter->state)) {
6114                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6115                          " but the PF device is not up.\n");
6116                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6117                          " attempting to use the VF device.\n");
6118         }
6119         return igb_set_vf_mac(adapter, vf, mac);
6120 }
6121
6122 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6123 {
6124         return -EOPNOTSUPP;
6125 }
6126
6127 static int igb_ndo_get_vf_config(struct net_device *netdev,
6128                                  int vf, struct ifla_vf_info *ivi)
6129 {
6130         struct igb_adapter *adapter = netdev_priv(netdev);
6131         if (vf >= adapter->vfs_allocated_count)
6132                 return -EINVAL;
6133         ivi->vf = vf;
6134         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6135         ivi->tx_rate = 0;
6136         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6137         ivi->qos = adapter->vf_data[vf].pf_qos;
6138         return 0;
6139 }
6140
6141 static void igb_vmm_control(struct igb_adapter *adapter)
6142 {
6143         struct e1000_hw *hw = &adapter->hw;
6144         u32 reg;
6145
6146         switch (hw->mac.type) {
6147         case e1000_82575:
6148         default:
6149                 /* replication is not supported for 82575 */
6150                 return;
6151         case e1000_82576:
6152                 /* notify HW that the MAC is adding vlan tags */
6153                 reg = rd32(E1000_DTXCTL);
6154                 reg |= E1000_DTXCTL_VLAN_ADDED;
6155                 wr32(E1000_DTXCTL, reg);
6156         case e1000_82580:
6157                 /* enable replication vlan tag stripping */
6158                 reg = rd32(E1000_RPLOLR);
6159                 reg |= E1000_RPLOLR_STRVLAN;
6160                 wr32(E1000_RPLOLR, reg);
6161         case e1000_i350:
6162                 /* none of the above registers are supported by i350 */
6163                 break;
6164         }
6165
6166         if (adapter->vfs_allocated_count) {
6167                 igb_vmdq_set_loopback_pf(hw, true);
6168                 igb_vmdq_set_replication_pf(hw, true);
6169         } else {
6170                 igb_vmdq_set_loopback_pf(hw, false);
6171                 igb_vmdq_set_replication_pf(hw, false);
6172         }
6173 }
6174
6175 /* igb_main.c */