]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
919e36386675feb8f2a4cad855d35a9d8eba3415
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
85         /* required last entry */
86         {0, }
87 };
88
89 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
90
91 void igb_reset(struct igb_adapter *);
92 static int igb_setup_all_tx_resources(struct igb_adapter *);
93 static int igb_setup_all_rx_resources(struct igb_adapter *);
94 static void igb_free_all_tx_resources(struct igb_adapter *);
95 static void igb_free_all_rx_resources(struct igb_adapter *);
96 static void igb_setup_mrqc(struct igb_adapter *);
97 void igb_update_stats(struct igb_adapter *);
98 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
99 static void __devexit igb_remove(struct pci_dev *pdev);
100 static int igb_sw_init(struct igb_adapter *);
101 static int igb_open(struct net_device *);
102 static int igb_close(struct net_device *);
103 static void igb_configure_tx(struct igb_adapter *);
104 static void igb_configure_rx(struct igb_adapter *);
105 static void igb_clean_all_tx_rings(struct igb_adapter *);
106 static void igb_clean_all_rx_rings(struct igb_adapter *);
107 static void igb_clean_tx_ring(struct igb_ring *);
108 static void igb_clean_rx_ring(struct igb_ring *);
109 static void igb_set_rx_mode(struct net_device *);
110 static void igb_update_phy_info(unsigned long);
111 static void igb_watchdog(unsigned long);
112 static void igb_watchdog_task(struct work_struct *);
113 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
114 static struct net_device_stats *igb_get_stats(struct net_device *);
115 static int igb_change_mtu(struct net_device *, int);
116 static int igb_set_mac(struct net_device *, void *);
117 static void igb_set_uta(struct igb_adapter *adapter);
118 static irqreturn_t igb_intr(int irq, void *);
119 static irqreturn_t igb_intr_msi(int irq, void *);
120 static irqreturn_t igb_msix_other(int irq, void *);
121 static irqreturn_t igb_msix_ring(int irq, void *);
122 #ifdef CONFIG_IGB_DCA
123 static void igb_update_dca(struct igb_q_vector *);
124 static void igb_setup_dca(struct igb_adapter *);
125 #endif /* CONFIG_IGB_DCA */
126 static bool igb_clean_tx_irq(struct igb_q_vector *);
127 static int igb_poll(struct napi_struct *, int);
128 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
129 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
130 static void igb_tx_timeout(struct net_device *);
131 static void igb_reset_task(struct work_struct *);
132 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
133 static void igb_vlan_rx_add_vid(struct net_device *, u16);
134 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
135 static void igb_restore_vlan(struct igb_adapter *);
136 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
137 static void igb_ping_all_vfs(struct igb_adapter *);
138 static void igb_msg_task(struct igb_adapter *);
139 static void igb_vmm_control(struct igb_adapter *);
140 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
141 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
142 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
143 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
144                                int vf, u16 vlan, u8 qos);
145 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
146 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
147                                  struct ifla_vf_info *ivi);
148
149 #ifdef CONFIG_PM
150 static int igb_suspend(struct pci_dev *, pm_message_t);
151 static int igb_resume(struct pci_dev *);
152 #endif
153 static void igb_shutdown(struct pci_dev *);
154 #ifdef CONFIG_IGB_DCA
155 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
156 static struct notifier_block dca_notifier = {
157         .notifier_call  = igb_notify_dca,
158         .next           = NULL,
159         .priority       = 0
160 };
161 #endif
162 #ifdef CONFIG_NET_POLL_CONTROLLER
163 /* for netdump / net console */
164 static void igb_netpoll(struct net_device *);
165 #endif
166 #ifdef CONFIG_PCI_IOV
167 static unsigned int max_vfs = 0;
168 module_param(max_vfs, uint, 0);
169 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
170                  "per physical function");
171 #endif /* CONFIG_PCI_IOV */
172
173 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
174                      pci_channel_state_t);
175 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
176 static void igb_io_resume(struct pci_dev *);
177
178 static struct pci_error_handlers igb_err_handler = {
179         .error_detected = igb_io_error_detected,
180         .slot_reset = igb_io_slot_reset,
181         .resume = igb_io_resume,
182 };
183
184
185 static struct pci_driver igb_driver = {
186         .name     = igb_driver_name,
187         .id_table = igb_pci_tbl,
188         .probe    = igb_probe,
189         .remove   = __devexit_p(igb_remove),
190 #ifdef CONFIG_PM
191         /* Power Managment Hooks */
192         .suspend  = igb_suspend,
193         .resume   = igb_resume,
194 #endif
195         .shutdown = igb_shutdown,
196         .err_handler = &igb_err_handler
197 };
198
199 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
200 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
201 MODULE_LICENSE("GPL");
202 MODULE_VERSION(DRV_VERSION);
203
204 /**
205  * igb_read_clock - read raw cycle counter (to be used by time counter)
206  */
207 static cycle_t igb_read_clock(const struct cyclecounter *tc)
208 {
209         struct igb_adapter *adapter =
210                 container_of(tc, struct igb_adapter, cycles);
211         struct e1000_hw *hw = &adapter->hw;
212         u64 stamp = 0;
213         int shift = 0;
214
215         /*
216          * The timestamp latches on lowest register read. For the 82580
217          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
218          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
219          */
220         if (hw->mac.type == e1000_82580) {
221                 stamp = rd32(E1000_SYSTIMR) >> 8;
222                 shift = IGB_82580_TSYNC_SHIFT;
223         }
224
225         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
226         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
227         return stamp;
228 }
229
230 /**
231  * igb_get_hw_dev - return device
232  * used by hardware layer to print debugging information
233  **/
234 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
235 {
236         struct igb_adapter *adapter = hw->back;
237         return adapter->netdev;
238 }
239
240 /**
241  * igb_init_module - Driver Registration Routine
242  *
243  * igb_init_module is the first routine called when the driver is
244  * loaded. All it does is register with the PCI subsystem.
245  **/
246 static int __init igb_init_module(void)
247 {
248         int ret;
249         printk(KERN_INFO "%s - version %s\n",
250                igb_driver_string, igb_driver_version);
251
252         printk(KERN_INFO "%s\n", igb_copyright);
253
254 #ifdef CONFIG_IGB_DCA
255         dca_register_notify(&dca_notifier);
256 #endif
257         ret = pci_register_driver(&igb_driver);
258         return ret;
259 }
260
261 module_init(igb_init_module);
262
263 /**
264  * igb_exit_module - Driver Exit Cleanup Routine
265  *
266  * igb_exit_module is called just before the driver is removed
267  * from memory.
268  **/
269 static void __exit igb_exit_module(void)
270 {
271 #ifdef CONFIG_IGB_DCA
272         dca_unregister_notify(&dca_notifier);
273 #endif
274         pci_unregister_driver(&igb_driver);
275 }
276
277 module_exit(igb_exit_module);
278
279 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
280 /**
281  * igb_cache_ring_register - Descriptor ring to register mapping
282  * @adapter: board private structure to initialize
283  *
284  * Once we know the feature-set enabled for the device, we'll cache
285  * the register offset the descriptor ring is assigned to.
286  **/
287 static void igb_cache_ring_register(struct igb_adapter *adapter)
288 {
289         int i = 0, j = 0;
290         u32 rbase_offset = adapter->vfs_allocated_count;
291
292         switch (adapter->hw.mac.type) {
293         case e1000_82576:
294                 /* The queues are allocated for virtualization such that VF 0
295                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
296                  * In order to avoid collision we start at the first free queue
297                  * and continue consuming queues in the same sequence
298                  */
299                 if (adapter->vfs_allocated_count) {
300                         for (; i < adapter->rss_queues; i++)
301                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
302                                                                Q_IDX_82576(i);
303                         for (; j < adapter->rss_queues; j++)
304                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
305                                                                Q_IDX_82576(j);
306                 }
307         case e1000_82575:
308         case e1000_82580:
309         case e1000_i350:
310         default:
311                 for (; i < adapter->num_rx_queues; i++)
312                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
313                 for (; j < adapter->num_tx_queues; j++)
314                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
315                 break;
316         }
317 }
318
319 static void igb_free_queues(struct igb_adapter *adapter)
320 {
321         int i;
322
323         for (i = 0; i < adapter->num_tx_queues; i++) {
324                 kfree(adapter->tx_ring[i]);
325                 adapter->tx_ring[i] = NULL;
326         }
327         for (i = 0; i < adapter->num_rx_queues; i++) {
328                 kfree(adapter->rx_ring[i]);
329                 adapter->rx_ring[i] = NULL;
330         }
331         adapter->num_rx_queues = 0;
332         adapter->num_tx_queues = 0;
333 }
334
335 /**
336  * igb_alloc_queues - Allocate memory for all rings
337  * @adapter: board private structure to initialize
338  *
339  * We allocate one ring per queue at run-time since we don't know the
340  * number of queues at compile-time.
341  **/
342 static int igb_alloc_queues(struct igb_adapter *adapter)
343 {
344         struct igb_ring *ring;
345         int i;
346
347         for (i = 0; i < adapter->num_tx_queues; i++) {
348                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
349                 if (!ring)
350                         goto err;
351                 ring->count = adapter->tx_ring_count;
352                 ring->queue_index = i;
353                 ring->pdev = adapter->pdev;
354                 ring->netdev = adapter->netdev;
355                 /* For 82575, context index must be unique per ring. */
356                 if (adapter->hw.mac.type == e1000_82575)
357                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
358                 adapter->tx_ring[i] = ring;
359         }
360
361         for (i = 0; i < adapter->num_rx_queues; i++) {
362                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
363                 if (!ring)
364                         goto err;
365                 ring->count = adapter->rx_ring_count;
366                 ring->queue_index = i;
367                 ring->pdev = adapter->pdev;
368                 ring->netdev = adapter->netdev;
369                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
370                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
371                 /* set flag indicating ring supports SCTP checksum offload */
372                 if (adapter->hw.mac.type >= e1000_82576)
373                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
374                 adapter->rx_ring[i] = ring;
375         }
376
377         igb_cache_ring_register(adapter);
378
379         return 0;
380
381 err:
382         igb_free_queues(adapter);
383
384         return -ENOMEM;
385 }
386
387 #define IGB_N0_QUEUE -1
388 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
389 {
390         u32 msixbm = 0;
391         struct igb_adapter *adapter = q_vector->adapter;
392         struct e1000_hw *hw = &adapter->hw;
393         u32 ivar, index;
394         int rx_queue = IGB_N0_QUEUE;
395         int tx_queue = IGB_N0_QUEUE;
396
397         if (q_vector->rx_ring)
398                 rx_queue = q_vector->rx_ring->reg_idx;
399         if (q_vector->tx_ring)
400                 tx_queue = q_vector->tx_ring->reg_idx;
401
402         switch (hw->mac.type) {
403         case e1000_82575:
404                 /* The 82575 assigns vectors using a bitmask, which matches the
405                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
406                    or more queues to a vector, we write the appropriate bits
407                    into the MSIXBM register for that vector. */
408                 if (rx_queue > IGB_N0_QUEUE)
409                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
410                 if (tx_queue > IGB_N0_QUEUE)
411                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
412                 if (!adapter->msix_entries && msix_vector == 0)
413                         msixbm |= E1000_EIMS_OTHER;
414                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
415                 q_vector->eims_value = msixbm;
416                 break;
417         case e1000_82576:
418                 /* 82576 uses a table-based method for assigning vectors.
419                    Each queue has a single entry in the table to which we write
420                    a vector number along with a "valid" bit.  Sadly, the layout
421                    of the table is somewhat counterintuitive. */
422                 if (rx_queue > IGB_N0_QUEUE) {
423                         index = (rx_queue & 0x7);
424                         ivar = array_rd32(E1000_IVAR0, index);
425                         if (rx_queue < 8) {
426                                 /* vector goes into low byte of register */
427                                 ivar = ivar & 0xFFFFFF00;
428                                 ivar |= msix_vector | E1000_IVAR_VALID;
429                         } else {
430                                 /* vector goes into third byte of register */
431                                 ivar = ivar & 0xFF00FFFF;
432                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
433                         }
434                         array_wr32(E1000_IVAR0, index, ivar);
435                 }
436                 if (tx_queue > IGB_N0_QUEUE) {
437                         index = (tx_queue & 0x7);
438                         ivar = array_rd32(E1000_IVAR0, index);
439                         if (tx_queue < 8) {
440                                 /* vector goes into second byte of register */
441                                 ivar = ivar & 0xFFFF00FF;
442                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
443                         } else {
444                                 /* vector goes into high byte of register */
445                                 ivar = ivar & 0x00FFFFFF;
446                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
447                         }
448                         array_wr32(E1000_IVAR0, index, ivar);
449                 }
450                 q_vector->eims_value = 1 << msix_vector;
451                 break;
452         case e1000_82580:
453         case e1000_i350:
454                 /* 82580 uses the same table-based approach as 82576 but has fewer
455                    entries as a result we carry over for queues greater than 4. */
456                 if (rx_queue > IGB_N0_QUEUE) {
457                         index = (rx_queue >> 1);
458                         ivar = array_rd32(E1000_IVAR0, index);
459                         if (rx_queue & 0x1) {
460                                 /* vector goes into third byte of register */
461                                 ivar = ivar & 0xFF00FFFF;
462                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
463                         } else {
464                                 /* vector goes into low byte of register */
465                                 ivar = ivar & 0xFFFFFF00;
466                                 ivar |= msix_vector | E1000_IVAR_VALID;
467                         }
468                         array_wr32(E1000_IVAR0, index, ivar);
469                 }
470                 if (tx_queue > IGB_N0_QUEUE) {
471                         index = (tx_queue >> 1);
472                         ivar = array_rd32(E1000_IVAR0, index);
473                         if (tx_queue & 0x1) {
474                                 /* vector goes into high byte of register */
475                                 ivar = ivar & 0x00FFFFFF;
476                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
477                         } else {
478                                 /* vector goes into second byte of register */
479                                 ivar = ivar & 0xFFFF00FF;
480                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
481                         }
482                         array_wr32(E1000_IVAR0, index, ivar);
483                 }
484                 q_vector->eims_value = 1 << msix_vector;
485                 break;
486         default:
487                 BUG();
488                 break;
489         }
490
491         /* add q_vector eims value to global eims_enable_mask */
492         adapter->eims_enable_mask |= q_vector->eims_value;
493
494         /* configure q_vector to set itr on first interrupt */
495         q_vector->set_itr = 1;
496 }
497
498 /**
499  * igb_configure_msix - Configure MSI-X hardware
500  *
501  * igb_configure_msix sets up the hardware to properly
502  * generate MSI-X interrupts.
503  **/
504 static void igb_configure_msix(struct igb_adapter *adapter)
505 {
506         u32 tmp;
507         int i, vector = 0;
508         struct e1000_hw *hw = &adapter->hw;
509
510         adapter->eims_enable_mask = 0;
511
512         /* set vector for other causes, i.e. link changes */
513         switch (hw->mac.type) {
514         case e1000_82575:
515                 tmp = rd32(E1000_CTRL_EXT);
516                 /* enable MSI-X PBA support*/
517                 tmp |= E1000_CTRL_EXT_PBA_CLR;
518
519                 /* Auto-Mask interrupts upon ICR read. */
520                 tmp |= E1000_CTRL_EXT_EIAME;
521                 tmp |= E1000_CTRL_EXT_IRCA;
522
523                 wr32(E1000_CTRL_EXT, tmp);
524
525                 /* enable msix_other interrupt */
526                 array_wr32(E1000_MSIXBM(0), vector++,
527                                       E1000_EIMS_OTHER);
528                 adapter->eims_other = E1000_EIMS_OTHER;
529
530                 break;
531
532         case e1000_82576:
533         case e1000_82580:
534         case e1000_i350:
535                 /* Turn on MSI-X capability first, or our settings
536                  * won't stick.  And it will take days to debug. */
537                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
538                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
539                                 E1000_GPIE_NSICR);
540
541                 /* enable msix_other interrupt */
542                 adapter->eims_other = 1 << vector;
543                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
544
545                 wr32(E1000_IVAR_MISC, tmp);
546                 break;
547         default:
548                 /* do nothing, since nothing else supports MSI-X */
549                 break;
550         } /* switch (hw->mac.type) */
551
552         adapter->eims_enable_mask |= adapter->eims_other;
553
554         for (i = 0; i < adapter->num_q_vectors; i++)
555                 igb_assign_vector(adapter->q_vector[i], vector++);
556
557         wrfl();
558 }
559
560 /**
561  * igb_request_msix - Initialize MSI-X interrupts
562  *
563  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
564  * kernel.
565  **/
566 static int igb_request_msix(struct igb_adapter *adapter)
567 {
568         struct net_device *netdev = adapter->netdev;
569         struct e1000_hw *hw = &adapter->hw;
570         int i, err = 0, vector = 0;
571
572         err = request_irq(adapter->msix_entries[vector].vector,
573                           igb_msix_other, 0, netdev->name, adapter);
574         if (err)
575                 goto out;
576         vector++;
577
578         for (i = 0; i < adapter->num_q_vectors; i++) {
579                 struct igb_q_vector *q_vector = adapter->q_vector[i];
580
581                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
582
583                 if (q_vector->rx_ring && q_vector->tx_ring)
584                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
585                                 q_vector->rx_ring->queue_index);
586                 else if (q_vector->tx_ring)
587                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
588                                 q_vector->tx_ring->queue_index);
589                 else if (q_vector->rx_ring)
590                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
591                                 q_vector->rx_ring->queue_index);
592                 else
593                         sprintf(q_vector->name, "%s-unused", netdev->name);
594
595                 err = request_irq(adapter->msix_entries[vector].vector,
596                                   igb_msix_ring, 0, q_vector->name,
597                                   q_vector);
598                 if (err)
599                         goto out;
600                 vector++;
601         }
602
603         igb_configure_msix(adapter);
604         return 0;
605 out:
606         return err;
607 }
608
609 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
610 {
611         if (adapter->msix_entries) {
612                 pci_disable_msix(adapter->pdev);
613                 kfree(adapter->msix_entries);
614                 adapter->msix_entries = NULL;
615         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
616                 pci_disable_msi(adapter->pdev);
617         }
618 }
619
620 /**
621  * igb_free_q_vectors - Free memory allocated for interrupt vectors
622  * @adapter: board private structure to initialize
623  *
624  * This function frees the memory allocated to the q_vectors.  In addition if
625  * NAPI is enabled it will delete any references to the NAPI struct prior
626  * to freeing the q_vector.
627  **/
628 static void igb_free_q_vectors(struct igb_adapter *adapter)
629 {
630         int v_idx;
631
632         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
633                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
634                 adapter->q_vector[v_idx] = NULL;
635                 if (!q_vector)
636                         continue;
637                 netif_napi_del(&q_vector->napi);
638                 kfree(q_vector);
639         }
640         adapter->num_q_vectors = 0;
641 }
642
643 /**
644  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
645  *
646  * This function resets the device so that it has 0 rx queues, tx queues, and
647  * MSI-X interrupts allocated.
648  */
649 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
650 {
651         igb_free_queues(adapter);
652         igb_free_q_vectors(adapter);
653         igb_reset_interrupt_capability(adapter);
654 }
655
656 /**
657  * igb_set_interrupt_capability - set MSI or MSI-X if supported
658  *
659  * Attempt to configure interrupts using the best available
660  * capabilities of the hardware and kernel.
661  **/
662 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
663 {
664         int err;
665         int numvecs, i;
666
667         /* Number of supported queues. */
668         adapter->num_rx_queues = adapter->rss_queues;
669         adapter->num_tx_queues = adapter->rss_queues;
670
671         /* start with one vector for every rx queue */
672         numvecs = adapter->num_rx_queues;
673
674         /* if tx handler is separate add 1 for every tx queue */
675         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
676                 numvecs += adapter->num_tx_queues;
677
678         /* store the number of vectors reserved for queues */
679         adapter->num_q_vectors = numvecs;
680
681         /* add 1 vector for link status interrupts */
682         numvecs++;
683         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
684                                         GFP_KERNEL);
685         if (!adapter->msix_entries)
686                 goto msi_only;
687
688         for (i = 0; i < numvecs; i++)
689                 adapter->msix_entries[i].entry = i;
690
691         err = pci_enable_msix(adapter->pdev,
692                               adapter->msix_entries,
693                               numvecs);
694         if (err == 0)
695                 goto out;
696
697         igb_reset_interrupt_capability(adapter);
698
699         /* If we can't do MSI-X, try MSI */
700 msi_only:
701 #ifdef CONFIG_PCI_IOV
702         /* disable SR-IOV for non MSI-X configurations */
703         if (adapter->vf_data) {
704                 struct e1000_hw *hw = &adapter->hw;
705                 /* disable iov and allow time for transactions to clear */
706                 pci_disable_sriov(adapter->pdev);
707                 msleep(500);
708
709                 kfree(adapter->vf_data);
710                 adapter->vf_data = NULL;
711                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
712                 msleep(100);
713                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
714         }
715 #endif
716         adapter->vfs_allocated_count = 0;
717         adapter->rss_queues = 1;
718         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
719         adapter->num_rx_queues = 1;
720         adapter->num_tx_queues = 1;
721         adapter->num_q_vectors = 1;
722         if (!pci_enable_msi(adapter->pdev))
723                 adapter->flags |= IGB_FLAG_HAS_MSI;
724 out:
725         /* Notify the stack of the (possibly) reduced Tx Queue count. */
726         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
727         return;
728 }
729
730 /**
731  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
732  * @adapter: board private structure to initialize
733  *
734  * We allocate one q_vector per queue interrupt.  If allocation fails we
735  * return -ENOMEM.
736  **/
737 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
738 {
739         struct igb_q_vector *q_vector;
740         struct e1000_hw *hw = &adapter->hw;
741         int v_idx;
742
743         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
744                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
745                 if (!q_vector)
746                         goto err_out;
747                 q_vector->adapter = adapter;
748                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
749                 q_vector->itr_val = IGB_START_ITR;
750                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
751                 adapter->q_vector[v_idx] = q_vector;
752         }
753         return 0;
754
755 err_out:
756         igb_free_q_vectors(adapter);
757         return -ENOMEM;
758 }
759
760 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
761                                       int ring_idx, int v_idx)
762 {
763         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
764
765         q_vector->rx_ring = adapter->rx_ring[ring_idx];
766         q_vector->rx_ring->q_vector = q_vector;
767         q_vector->itr_val = adapter->rx_itr_setting;
768         if (q_vector->itr_val && q_vector->itr_val <= 3)
769                 q_vector->itr_val = IGB_START_ITR;
770 }
771
772 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
773                                       int ring_idx, int v_idx)
774 {
775         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
776
777         q_vector->tx_ring = adapter->tx_ring[ring_idx];
778         q_vector->tx_ring->q_vector = q_vector;
779         q_vector->itr_val = adapter->tx_itr_setting;
780         if (q_vector->itr_val && q_vector->itr_val <= 3)
781                 q_vector->itr_val = IGB_START_ITR;
782 }
783
784 /**
785  * igb_map_ring_to_vector - maps allocated queues to vectors
786  *
787  * This function maps the recently allocated queues to vectors.
788  **/
789 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
790 {
791         int i;
792         int v_idx = 0;
793
794         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
795             (adapter->num_q_vectors < adapter->num_tx_queues))
796                 return -ENOMEM;
797
798         if (adapter->num_q_vectors >=
799             (adapter->num_rx_queues + adapter->num_tx_queues)) {
800                 for (i = 0; i < adapter->num_rx_queues; i++)
801                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
802                 for (i = 0; i < adapter->num_tx_queues; i++)
803                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
804         } else {
805                 for (i = 0; i < adapter->num_rx_queues; i++) {
806                         if (i < adapter->num_tx_queues)
807                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
808                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
809                 }
810                 for (; i < adapter->num_tx_queues; i++)
811                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
812         }
813         return 0;
814 }
815
816 /**
817  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
818  *
819  * This function initializes the interrupts and allocates all of the queues.
820  **/
821 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
822 {
823         struct pci_dev *pdev = adapter->pdev;
824         int err;
825
826         igb_set_interrupt_capability(adapter);
827
828         err = igb_alloc_q_vectors(adapter);
829         if (err) {
830                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
831                 goto err_alloc_q_vectors;
832         }
833
834         err = igb_alloc_queues(adapter);
835         if (err) {
836                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
837                 goto err_alloc_queues;
838         }
839
840         err = igb_map_ring_to_vector(adapter);
841         if (err) {
842                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
843                 goto err_map_queues;
844         }
845
846
847         return 0;
848 err_map_queues:
849         igb_free_queues(adapter);
850 err_alloc_queues:
851         igb_free_q_vectors(adapter);
852 err_alloc_q_vectors:
853         igb_reset_interrupt_capability(adapter);
854         return err;
855 }
856
857 /**
858  * igb_request_irq - initialize interrupts
859  *
860  * Attempts to configure interrupts using the best available
861  * capabilities of the hardware and kernel.
862  **/
863 static int igb_request_irq(struct igb_adapter *adapter)
864 {
865         struct net_device *netdev = adapter->netdev;
866         struct pci_dev *pdev = adapter->pdev;
867         int err = 0;
868
869         if (adapter->msix_entries) {
870                 err = igb_request_msix(adapter);
871                 if (!err)
872                         goto request_done;
873                 /* fall back to MSI */
874                 igb_clear_interrupt_scheme(adapter);
875                 if (!pci_enable_msi(adapter->pdev))
876                         adapter->flags |= IGB_FLAG_HAS_MSI;
877                 igb_free_all_tx_resources(adapter);
878                 igb_free_all_rx_resources(adapter);
879                 adapter->num_tx_queues = 1;
880                 adapter->num_rx_queues = 1;
881                 adapter->num_q_vectors = 1;
882                 err = igb_alloc_q_vectors(adapter);
883                 if (err) {
884                         dev_err(&pdev->dev,
885                                 "Unable to allocate memory for vectors\n");
886                         goto request_done;
887                 }
888                 err = igb_alloc_queues(adapter);
889                 if (err) {
890                         dev_err(&pdev->dev,
891                                 "Unable to allocate memory for queues\n");
892                         igb_free_q_vectors(adapter);
893                         goto request_done;
894                 }
895                 igb_setup_all_tx_resources(adapter);
896                 igb_setup_all_rx_resources(adapter);
897         } else {
898                 igb_assign_vector(adapter->q_vector[0], 0);
899         }
900
901         if (adapter->flags & IGB_FLAG_HAS_MSI) {
902                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
903                                   netdev->name, adapter);
904                 if (!err)
905                         goto request_done;
906
907                 /* fall back to legacy interrupts */
908                 igb_reset_interrupt_capability(adapter);
909                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
910         }
911
912         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
913                           netdev->name, adapter);
914
915         if (err)
916                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
917                         err);
918
919 request_done:
920         return err;
921 }
922
923 static void igb_free_irq(struct igb_adapter *adapter)
924 {
925         if (adapter->msix_entries) {
926                 int vector = 0, i;
927
928                 free_irq(adapter->msix_entries[vector++].vector, adapter);
929
930                 for (i = 0; i < adapter->num_q_vectors; i++) {
931                         struct igb_q_vector *q_vector = adapter->q_vector[i];
932                         free_irq(adapter->msix_entries[vector++].vector,
933                                  q_vector);
934                 }
935         } else {
936                 free_irq(adapter->pdev->irq, adapter);
937         }
938 }
939
940 /**
941  * igb_irq_disable - Mask off interrupt generation on the NIC
942  * @adapter: board private structure
943  **/
944 static void igb_irq_disable(struct igb_adapter *adapter)
945 {
946         struct e1000_hw *hw = &adapter->hw;
947
948         /*
949          * we need to be careful when disabling interrupts.  The VFs are also
950          * mapped into these registers and so clearing the bits can cause
951          * issues on the VF drivers so we only need to clear what we set
952          */
953         if (adapter->msix_entries) {
954                 u32 regval = rd32(E1000_EIAM);
955                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
956                 wr32(E1000_EIMC, adapter->eims_enable_mask);
957                 regval = rd32(E1000_EIAC);
958                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
959         }
960
961         wr32(E1000_IAM, 0);
962         wr32(E1000_IMC, ~0);
963         wrfl();
964         synchronize_irq(adapter->pdev->irq);
965 }
966
967 /**
968  * igb_irq_enable - Enable default interrupt generation settings
969  * @adapter: board private structure
970  **/
971 static void igb_irq_enable(struct igb_adapter *adapter)
972 {
973         struct e1000_hw *hw = &adapter->hw;
974
975         if (adapter->msix_entries) {
976                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
977                 u32 regval = rd32(E1000_EIAC);
978                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
979                 regval = rd32(E1000_EIAM);
980                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
981                 wr32(E1000_EIMS, adapter->eims_enable_mask);
982                 if (adapter->vfs_allocated_count) {
983                         wr32(E1000_MBVFIMR, 0xFF);
984                         ims |= E1000_IMS_VMMB;
985                 }
986                 if (adapter->hw.mac.type == e1000_82580)
987                         ims |= E1000_IMS_DRSTA;
988
989                 wr32(E1000_IMS, ims);
990         } else {
991                 wr32(E1000_IMS, IMS_ENABLE_MASK |
992                                 E1000_IMS_DRSTA);
993                 wr32(E1000_IAM, IMS_ENABLE_MASK |
994                                 E1000_IMS_DRSTA);
995         }
996 }
997
998 static void igb_update_mng_vlan(struct igb_adapter *adapter)
999 {
1000         struct e1000_hw *hw = &adapter->hw;
1001         u16 vid = adapter->hw.mng_cookie.vlan_id;
1002         u16 old_vid = adapter->mng_vlan_id;
1003
1004         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1005                 /* add VID to filter table */
1006                 igb_vfta_set(hw, vid, true);
1007                 adapter->mng_vlan_id = vid;
1008         } else {
1009                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1010         }
1011
1012         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1013             (vid != old_vid) &&
1014             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1015                 /* remove VID from filter table */
1016                 igb_vfta_set(hw, old_vid, false);
1017         }
1018 }
1019
1020 /**
1021  * igb_release_hw_control - release control of the h/w to f/w
1022  * @adapter: address of board private structure
1023  *
1024  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1025  * For ASF and Pass Through versions of f/w this means that the
1026  * driver is no longer loaded.
1027  *
1028  **/
1029 static void igb_release_hw_control(struct igb_adapter *adapter)
1030 {
1031         struct e1000_hw *hw = &adapter->hw;
1032         u32 ctrl_ext;
1033
1034         /* Let firmware take over control of h/w */
1035         ctrl_ext = rd32(E1000_CTRL_EXT);
1036         wr32(E1000_CTRL_EXT,
1037                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1038 }
1039
1040 /**
1041  * igb_get_hw_control - get control of the h/w from f/w
1042  * @adapter: address of board private structure
1043  *
1044  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1045  * For ASF and Pass Through versions of f/w this means that
1046  * the driver is loaded.
1047  *
1048  **/
1049 static void igb_get_hw_control(struct igb_adapter *adapter)
1050 {
1051         struct e1000_hw *hw = &adapter->hw;
1052         u32 ctrl_ext;
1053
1054         /* Let firmware know the driver has taken over */
1055         ctrl_ext = rd32(E1000_CTRL_EXT);
1056         wr32(E1000_CTRL_EXT,
1057                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1058 }
1059
1060 /**
1061  * igb_configure - configure the hardware for RX and TX
1062  * @adapter: private board structure
1063  **/
1064 static void igb_configure(struct igb_adapter *adapter)
1065 {
1066         struct net_device *netdev = adapter->netdev;
1067         int i;
1068
1069         igb_get_hw_control(adapter);
1070         igb_set_rx_mode(netdev);
1071
1072         igb_restore_vlan(adapter);
1073
1074         igb_setup_tctl(adapter);
1075         igb_setup_mrqc(adapter);
1076         igb_setup_rctl(adapter);
1077
1078         igb_configure_tx(adapter);
1079         igb_configure_rx(adapter);
1080
1081         igb_rx_fifo_flush_82575(&adapter->hw);
1082
1083         /* call igb_desc_unused which always leaves
1084          * at least 1 descriptor unused to make sure
1085          * next_to_use != next_to_clean */
1086         for (i = 0; i < adapter->num_rx_queues; i++) {
1087                 struct igb_ring *ring = adapter->rx_ring[i];
1088                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1089         }
1090 }
1091
1092 /**
1093  * igb_power_up_link - Power up the phy/serdes link
1094  * @adapter: address of board private structure
1095  **/
1096 void igb_power_up_link(struct igb_adapter *adapter)
1097 {
1098         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1099                 igb_power_up_phy_copper(&adapter->hw);
1100         else
1101                 igb_power_up_serdes_link_82575(&adapter->hw);
1102 }
1103
1104 /**
1105  * igb_power_down_link - Power down the phy/serdes link
1106  * @adapter: address of board private structure
1107  */
1108 static void igb_power_down_link(struct igb_adapter *adapter)
1109 {
1110         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1111                 igb_power_down_phy_copper_82575(&adapter->hw);
1112         else
1113                 igb_shutdown_serdes_link_82575(&adapter->hw);
1114 }
1115
1116 /**
1117  * igb_up - Open the interface and prepare it to handle traffic
1118  * @adapter: board private structure
1119  **/
1120 int igb_up(struct igb_adapter *adapter)
1121 {
1122         struct e1000_hw *hw = &adapter->hw;
1123         int i;
1124
1125         /* hardware has been reset, we need to reload some things */
1126         igb_configure(adapter);
1127
1128         clear_bit(__IGB_DOWN, &adapter->state);
1129
1130         for (i = 0; i < adapter->num_q_vectors; i++) {
1131                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1132                 napi_enable(&q_vector->napi);
1133         }
1134         if (adapter->msix_entries)
1135                 igb_configure_msix(adapter);
1136         else
1137                 igb_assign_vector(adapter->q_vector[0], 0);
1138
1139         /* Clear any pending interrupts. */
1140         rd32(E1000_ICR);
1141         igb_irq_enable(adapter);
1142
1143         /* notify VFs that reset has been completed */
1144         if (adapter->vfs_allocated_count) {
1145                 u32 reg_data = rd32(E1000_CTRL_EXT);
1146                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1147                 wr32(E1000_CTRL_EXT, reg_data);
1148         }
1149
1150         netif_tx_start_all_queues(adapter->netdev);
1151
1152         /* start the watchdog. */
1153         hw->mac.get_link_status = 1;
1154         schedule_work(&adapter->watchdog_task);
1155
1156         return 0;
1157 }
1158
1159 void igb_down(struct igb_adapter *adapter)
1160 {
1161         struct net_device *netdev = adapter->netdev;
1162         struct e1000_hw *hw = &adapter->hw;
1163         u32 tctl, rctl;
1164         int i;
1165
1166         /* signal that we're down so the interrupt handler does not
1167          * reschedule our watchdog timer */
1168         set_bit(__IGB_DOWN, &adapter->state);
1169
1170         /* disable receives in the hardware */
1171         rctl = rd32(E1000_RCTL);
1172         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1173         /* flush and sleep below */
1174
1175         netif_tx_stop_all_queues(netdev);
1176
1177         /* disable transmits in the hardware */
1178         tctl = rd32(E1000_TCTL);
1179         tctl &= ~E1000_TCTL_EN;
1180         wr32(E1000_TCTL, tctl);
1181         /* flush both disables and wait for them to finish */
1182         wrfl();
1183         msleep(10);
1184
1185         for (i = 0; i < adapter->num_q_vectors; i++) {
1186                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1187                 napi_disable(&q_vector->napi);
1188         }
1189
1190         igb_irq_disable(adapter);
1191
1192         del_timer_sync(&adapter->watchdog_timer);
1193         del_timer_sync(&adapter->phy_info_timer);
1194
1195         netif_carrier_off(netdev);
1196
1197         /* record the stats before reset*/
1198         igb_update_stats(adapter);
1199
1200         adapter->link_speed = 0;
1201         adapter->link_duplex = 0;
1202
1203         if (!pci_channel_offline(adapter->pdev))
1204                 igb_reset(adapter);
1205         igb_clean_all_tx_rings(adapter);
1206         igb_clean_all_rx_rings(adapter);
1207 #ifdef CONFIG_IGB_DCA
1208
1209         /* since we reset the hardware DCA settings were cleared */
1210         igb_setup_dca(adapter);
1211 #endif
1212 }
1213
1214 void igb_reinit_locked(struct igb_adapter *adapter)
1215 {
1216         WARN_ON(in_interrupt());
1217         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1218                 msleep(1);
1219         igb_down(adapter);
1220         igb_up(adapter);
1221         clear_bit(__IGB_RESETTING, &adapter->state);
1222 }
1223
1224 void igb_reset(struct igb_adapter *adapter)
1225 {
1226         struct pci_dev *pdev = adapter->pdev;
1227         struct e1000_hw *hw = &adapter->hw;
1228         struct e1000_mac_info *mac = &hw->mac;
1229         struct e1000_fc_info *fc = &hw->fc;
1230         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1231         u16 hwm;
1232
1233         /* Repartition Pba for greater than 9k mtu
1234          * To take effect CTRL.RST is required.
1235          */
1236         switch (mac->type) {
1237         case e1000_i350:
1238         case e1000_82580:
1239                 pba = rd32(E1000_RXPBS);
1240                 pba = igb_rxpbs_adjust_82580(pba);
1241                 break;
1242         case e1000_82576:
1243                 pba = rd32(E1000_RXPBS);
1244                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1245                 break;
1246         case e1000_82575:
1247         default:
1248                 pba = E1000_PBA_34K;
1249                 break;
1250         }
1251
1252         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1253             (mac->type < e1000_82576)) {
1254                 /* adjust PBA for jumbo frames */
1255                 wr32(E1000_PBA, pba);
1256
1257                 /* To maintain wire speed transmits, the Tx FIFO should be
1258                  * large enough to accommodate two full transmit packets,
1259                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1260                  * the Rx FIFO should be large enough to accommodate at least
1261                  * one full receive packet and is similarly rounded up and
1262                  * expressed in KB. */
1263                 pba = rd32(E1000_PBA);
1264                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1265                 tx_space = pba >> 16;
1266                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1267                 pba &= 0xffff;
1268                 /* the tx fifo also stores 16 bytes of information about the tx
1269                  * but don't include ethernet FCS because hardware appends it */
1270                 min_tx_space = (adapter->max_frame_size +
1271                                 sizeof(union e1000_adv_tx_desc) -
1272                                 ETH_FCS_LEN) * 2;
1273                 min_tx_space = ALIGN(min_tx_space, 1024);
1274                 min_tx_space >>= 10;
1275                 /* software strips receive CRC, so leave room for it */
1276                 min_rx_space = adapter->max_frame_size;
1277                 min_rx_space = ALIGN(min_rx_space, 1024);
1278                 min_rx_space >>= 10;
1279
1280                 /* If current Tx allocation is less than the min Tx FIFO size,
1281                  * and the min Tx FIFO size is less than the current Rx FIFO
1282                  * allocation, take space away from current Rx allocation */
1283                 if (tx_space < min_tx_space &&
1284                     ((min_tx_space - tx_space) < pba)) {
1285                         pba = pba - (min_tx_space - tx_space);
1286
1287                         /* if short on rx space, rx wins and must trump tx
1288                          * adjustment */
1289                         if (pba < min_rx_space)
1290                                 pba = min_rx_space;
1291                 }
1292                 wr32(E1000_PBA, pba);
1293         }
1294
1295         /* flow control settings */
1296         /* The high water mark must be low enough to fit one full frame
1297          * (or the size used for early receive) above it in the Rx FIFO.
1298          * Set it to the lower of:
1299          * - 90% of the Rx FIFO size, or
1300          * - the full Rx FIFO size minus one full frame */
1301         hwm = min(((pba << 10) * 9 / 10),
1302                         ((pba << 10) - 2 * adapter->max_frame_size));
1303
1304         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1305         fc->low_water = fc->high_water - 16;
1306         fc->pause_time = 0xFFFF;
1307         fc->send_xon = 1;
1308         fc->current_mode = fc->requested_mode;
1309
1310         /* disable receive for all VFs and wait one second */
1311         if (adapter->vfs_allocated_count) {
1312                 int i;
1313                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1314                         adapter->vf_data[i].flags = 0;
1315
1316                 /* ping all the active vfs to let them know we are going down */
1317                 igb_ping_all_vfs(adapter);
1318
1319                 /* disable transmits and receives */
1320                 wr32(E1000_VFRE, 0);
1321                 wr32(E1000_VFTE, 0);
1322         }
1323
1324         /* Allow time for pending master requests to run */
1325         hw->mac.ops.reset_hw(hw);
1326         wr32(E1000_WUC, 0);
1327
1328         if (hw->mac.ops.init_hw(hw))
1329                 dev_err(&pdev->dev, "Hardware Error\n");
1330
1331         if (hw->mac.type == e1000_82580) {
1332                 u32 reg = rd32(E1000_PCIEMISC);
1333                 wr32(E1000_PCIEMISC,
1334                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1335         }
1336         if (!netif_running(adapter->netdev))
1337                 igb_power_down_link(adapter);
1338
1339         igb_update_mng_vlan(adapter);
1340
1341         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1342         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1343
1344         igb_get_phy_info(hw);
1345 }
1346
1347 static const struct net_device_ops igb_netdev_ops = {
1348         .ndo_open               = igb_open,
1349         .ndo_stop               = igb_close,
1350         .ndo_start_xmit         = igb_xmit_frame_adv,
1351         .ndo_get_stats          = igb_get_stats,
1352         .ndo_set_rx_mode        = igb_set_rx_mode,
1353         .ndo_set_multicast_list = igb_set_rx_mode,
1354         .ndo_set_mac_address    = igb_set_mac,
1355         .ndo_change_mtu         = igb_change_mtu,
1356         .ndo_do_ioctl           = igb_ioctl,
1357         .ndo_tx_timeout         = igb_tx_timeout,
1358         .ndo_validate_addr      = eth_validate_addr,
1359         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1360         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1361         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1362         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1363         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1364         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1365         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1366 #ifdef CONFIG_NET_POLL_CONTROLLER
1367         .ndo_poll_controller    = igb_netpoll,
1368 #endif
1369 };
1370
1371 /**
1372  * igb_probe - Device Initialization Routine
1373  * @pdev: PCI device information struct
1374  * @ent: entry in igb_pci_tbl
1375  *
1376  * Returns 0 on success, negative on failure
1377  *
1378  * igb_probe initializes an adapter identified by a pci_dev structure.
1379  * The OS initialization, configuring of the adapter private structure,
1380  * and a hardware reset occur.
1381  **/
1382 static int __devinit igb_probe(struct pci_dev *pdev,
1383                                const struct pci_device_id *ent)
1384 {
1385         struct net_device *netdev;
1386         struct igb_adapter *adapter;
1387         struct e1000_hw *hw;
1388         u16 eeprom_data = 0;
1389         static int global_quad_port_a; /* global quad port a indication */
1390         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1391         unsigned long mmio_start, mmio_len;
1392         int err, pci_using_dac;
1393         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1394         u32 part_num;
1395
1396         err = pci_enable_device_mem(pdev);
1397         if (err)
1398                 return err;
1399
1400         pci_using_dac = 0;
1401         err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
1402         if (!err) {
1403                 err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
1404                 if (!err)
1405                         pci_using_dac = 1;
1406         } else {
1407                 err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
1408                 if (err) {
1409                         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
1410                         if (err) {
1411                                 dev_err(&pdev->dev, "No usable DMA "
1412                                         "configuration, aborting\n");
1413                                 goto err_dma;
1414                         }
1415                 }
1416         }
1417
1418         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1419                                            IORESOURCE_MEM),
1420                                            igb_driver_name);
1421         if (err)
1422                 goto err_pci_reg;
1423
1424         pci_enable_pcie_error_reporting(pdev);
1425
1426         pci_set_master(pdev);
1427         pci_save_state(pdev);
1428
1429         err = -ENOMEM;
1430         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1431                                    IGB_ABS_MAX_TX_QUEUES);
1432         if (!netdev)
1433                 goto err_alloc_etherdev;
1434
1435         SET_NETDEV_DEV(netdev, &pdev->dev);
1436
1437         pci_set_drvdata(pdev, netdev);
1438         adapter = netdev_priv(netdev);
1439         adapter->netdev = netdev;
1440         adapter->pdev = pdev;
1441         hw = &adapter->hw;
1442         hw->back = adapter;
1443         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1444
1445         mmio_start = pci_resource_start(pdev, 0);
1446         mmio_len = pci_resource_len(pdev, 0);
1447
1448         err = -EIO;
1449         hw->hw_addr = ioremap(mmio_start, mmio_len);
1450         if (!hw->hw_addr)
1451                 goto err_ioremap;
1452
1453         netdev->netdev_ops = &igb_netdev_ops;
1454         igb_set_ethtool_ops(netdev);
1455         netdev->watchdog_timeo = 5 * HZ;
1456
1457         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1458
1459         netdev->mem_start = mmio_start;
1460         netdev->mem_end = mmio_start + mmio_len;
1461
1462         /* PCI config space info */
1463         hw->vendor_id = pdev->vendor;
1464         hw->device_id = pdev->device;
1465         hw->revision_id = pdev->revision;
1466         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1467         hw->subsystem_device_id = pdev->subsystem_device;
1468
1469         /* Copy the default MAC, PHY and NVM function pointers */
1470         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1471         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1472         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1473         /* Initialize skew-specific constants */
1474         err = ei->get_invariants(hw);
1475         if (err)
1476                 goto err_sw_init;
1477
1478         /* setup the private structure */
1479         err = igb_sw_init(adapter);
1480         if (err)
1481                 goto err_sw_init;
1482
1483         igb_get_bus_info_pcie(hw);
1484
1485         hw->phy.autoneg_wait_to_complete = false;
1486
1487         /* Copper options */
1488         if (hw->phy.media_type == e1000_media_type_copper) {
1489                 hw->phy.mdix = AUTO_ALL_MODES;
1490                 hw->phy.disable_polarity_correction = false;
1491                 hw->phy.ms_type = e1000_ms_hw_default;
1492         }
1493
1494         if (igb_check_reset_block(hw))
1495                 dev_info(&pdev->dev,
1496                         "PHY reset is blocked due to SOL/IDER session.\n");
1497
1498         netdev->features = NETIF_F_SG |
1499                            NETIF_F_IP_CSUM |
1500                            NETIF_F_HW_VLAN_TX |
1501                            NETIF_F_HW_VLAN_RX |
1502                            NETIF_F_HW_VLAN_FILTER;
1503
1504         netdev->features |= NETIF_F_IPV6_CSUM;
1505         netdev->features |= NETIF_F_TSO;
1506         netdev->features |= NETIF_F_TSO6;
1507         netdev->features |= NETIF_F_GRO;
1508
1509         netdev->vlan_features |= NETIF_F_TSO;
1510         netdev->vlan_features |= NETIF_F_TSO6;
1511         netdev->vlan_features |= NETIF_F_IP_CSUM;
1512         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1513         netdev->vlan_features |= NETIF_F_SG;
1514
1515         if (pci_using_dac)
1516                 netdev->features |= NETIF_F_HIGHDMA;
1517
1518         if (hw->mac.type >= e1000_82576)
1519                 netdev->features |= NETIF_F_SCTP_CSUM;
1520
1521         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1522
1523         /* before reading the NVM, reset the controller to put the device in a
1524          * known good starting state */
1525         hw->mac.ops.reset_hw(hw);
1526
1527         /* make sure the NVM is good */
1528         if (igb_validate_nvm_checksum(hw) < 0) {
1529                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1530                 err = -EIO;
1531                 goto err_eeprom;
1532         }
1533
1534         /* copy the MAC address out of the NVM */
1535         if (hw->mac.ops.read_mac_addr(hw))
1536                 dev_err(&pdev->dev, "NVM Read Error\n");
1537
1538         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1539         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1540
1541         if (!is_valid_ether_addr(netdev->perm_addr)) {
1542                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1543                 err = -EIO;
1544                 goto err_eeprom;
1545         }
1546
1547         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1548                     (unsigned long) adapter);
1549         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1550                     (unsigned long) adapter);
1551
1552         INIT_WORK(&adapter->reset_task, igb_reset_task);
1553         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1554
1555         /* Initialize link properties that are user-changeable */
1556         adapter->fc_autoneg = true;
1557         hw->mac.autoneg = true;
1558         hw->phy.autoneg_advertised = 0x2f;
1559
1560         hw->fc.requested_mode = e1000_fc_default;
1561         hw->fc.current_mode = e1000_fc_default;
1562
1563         igb_validate_mdi_setting(hw);
1564
1565         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1566          * enable the ACPI Magic Packet filter
1567          */
1568
1569         if (hw->bus.func == 0)
1570                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1571         else if (hw->mac.type == e1000_82580)
1572                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1573                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1574                                  &eeprom_data);
1575         else if (hw->bus.func == 1)
1576                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1577
1578         if (eeprom_data & eeprom_apme_mask)
1579                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1580
1581         /* now that we have the eeprom settings, apply the special cases where
1582          * the eeprom may be wrong or the board simply won't support wake on
1583          * lan on a particular port */
1584         switch (pdev->device) {
1585         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1586                 adapter->eeprom_wol = 0;
1587                 break;
1588         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1589         case E1000_DEV_ID_82576_FIBER:
1590         case E1000_DEV_ID_82576_SERDES:
1591                 /* Wake events only supported on port A for dual fiber
1592                  * regardless of eeprom setting */
1593                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1594                         adapter->eeprom_wol = 0;
1595                 break;
1596         case E1000_DEV_ID_82576_QUAD_COPPER:
1597         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1598                 /* if quad port adapter, disable WoL on all but port A */
1599                 if (global_quad_port_a != 0)
1600                         adapter->eeprom_wol = 0;
1601                 else
1602                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1603                 /* Reset for multiple quad port adapters */
1604                 if (++global_quad_port_a == 4)
1605                         global_quad_port_a = 0;
1606                 break;
1607         }
1608
1609         /* initialize the wol settings based on the eeprom settings */
1610         adapter->wol = adapter->eeprom_wol;
1611         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1612
1613         /* reset the hardware with the new settings */
1614         igb_reset(adapter);
1615
1616         /* let the f/w know that the h/w is now under the control of the
1617          * driver. */
1618         igb_get_hw_control(adapter);
1619
1620         strcpy(netdev->name, "eth%d");
1621         err = register_netdev(netdev);
1622         if (err)
1623                 goto err_register;
1624
1625         /* carrier off reporting is important to ethtool even BEFORE open */
1626         netif_carrier_off(netdev);
1627
1628 #ifdef CONFIG_IGB_DCA
1629         if (dca_add_requester(&pdev->dev) == 0) {
1630                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1631                 dev_info(&pdev->dev, "DCA enabled\n");
1632                 igb_setup_dca(adapter);
1633         }
1634
1635 #endif
1636         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1637         /* print bus type/speed/width info */
1638         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1639                  netdev->name,
1640                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1641                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1642                                                             "unknown"),
1643                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1644                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1645                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1646                    "unknown"),
1647                  netdev->dev_addr);
1648
1649         igb_read_part_num(hw, &part_num);
1650         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1651                 (part_num >> 8), (part_num & 0xff));
1652
1653         dev_info(&pdev->dev,
1654                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1655                 adapter->msix_entries ? "MSI-X" :
1656                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1657                 adapter->num_rx_queues, adapter->num_tx_queues);
1658
1659         return 0;
1660
1661 err_register:
1662         igb_release_hw_control(adapter);
1663 err_eeprom:
1664         if (!igb_check_reset_block(hw))
1665                 igb_reset_phy(hw);
1666
1667         if (hw->flash_address)
1668                 iounmap(hw->flash_address);
1669 err_sw_init:
1670         igb_clear_interrupt_scheme(adapter);
1671         iounmap(hw->hw_addr);
1672 err_ioremap:
1673         free_netdev(netdev);
1674 err_alloc_etherdev:
1675         pci_release_selected_regions(pdev,
1676                                      pci_select_bars(pdev, IORESOURCE_MEM));
1677 err_pci_reg:
1678 err_dma:
1679         pci_disable_device(pdev);
1680         return err;
1681 }
1682
1683 /**
1684  * igb_remove - Device Removal Routine
1685  * @pdev: PCI device information struct
1686  *
1687  * igb_remove is called by the PCI subsystem to alert the driver
1688  * that it should release a PCI device.  The could be caused by a
1689  * Hot-Plug event, or because the driver is going to be removed from
1690  * memory.
1691  **/
1692 static void __devexit igb_remove(struct pci_dev *pdev)
1693 {
1694         struct net_device *netdev = pci_get_drvdata(pdev);
1695         struct igb_adapter *adapter = netdev_priv(netdev);
1696         struct e1000_hw *hw = &adapter->hw;
1697
1698         /* flush_scheduled work may reschedule our watchdog task, so
1699          * explicitly disable watchdog tasks from being rescheduled  */
1700         set_bit(__IGB_DOWN, &adapter->state);
1701         del_timer_sync(&adapter->watchdog_timer);
1702         del_timer_sync(&adapter->phy_info_timer);
1703
1704         flush_scheduled_work();
1705
1706 #ifdef CONFIG_IGB_DCA
1707         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
1708                 dev_info(&pdev->dev, "DCA disabled\n");
1709                 dca_remove_requester(&pdev->dev);
1710                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
1711                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
1712         }
1713 #endif
1714
1715         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
1716          * would have already happened in close and is redundant. */
1717         igb_release_hw_control(adapter);
1718
1719         unregister_netdev(netdev);
1720
1721         igb_clear_interrupt_scheme(adapter);
1722
1723 #ifdef CONFIG_PCI_IOV
1724         /* reclaim resources allocated to VFs */
1725         if (adapter->vf_data) {
1726                 /* disable iov and allow time for transactions to clear */
1727                 pci_disable_sriov(pdev);
1728                 msleep(500);
1729
1730                 kfree(adapter->vf_data);
1731                 adapter->vf_data = NULL;
1732                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1733                 msleep(100);
1734                 dev_info(&pdev->dev, "IOV Disabled\n");
1735         }
1736 #endif
1737
1738         iounmap(hw->hw_addr);
1739         if (hw->flash_address)
1740                 iounmap(hw->flash_address);
1741         pci_release_selected_regions(pdev,
1742                                      pci_select_bars(pdev, IORESOURCE_MEM));
1743
1744         free_netdev(netdev);
1745
1746         pci_disable_pcie_error_reporting(pdev);
1747
1748         pci_disable_device(pdev);
1749 }
1750
1751 /**
1752  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
1753  * @adapter: board private structure to initialize
1754  *
1755  * This function initializes the vf specific data storage and then attempts to
1756  * allocate the VFs.  The reason for ordering it this way is because it is much
1757  * mor expensive time wise to disable SR-IOV than it is to allocate and free
1758  * the memory for the VFs.
1759  **/
1760 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
1761 {
1762 #ifdef CONFIG_PCI_IOV
1763         struct pci_dev *pdev = adapter->pdev;
1764
1765         if (adapter->vfs_allocated_count > 7)
1766                 adapter->vfs_allocated_count = 7;
1767
1768         if (adapter->vfs_allocated_count) {
1769                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
1770                                            sizeof(struct vf_data_storage),
1771                                            GFP_KERNEL);
1772                 /* if allocation failed then we do not support SR-IOV */
1773                 if (!adapter->vf_data) {
1774                         adapter->vfs_allocated_count = 0;
1775                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
1776                                 "Data Storage\n");
1777                 }
1778         }
1779
1780         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
1781                 kfree(adapter->vf_data);
1782                 adapter->vf_data = NULL;
1783 #endif /* CONFIG_PCI_IOV */
1784                 adapter->vfs_allocated_count = 0;
1785 #ifdef CONFIG_PCI_IOV
1786         } else {
1787                 unsigned char mac_addr[ETH_ALEN];
1788                 int i;
1789                 dev_info(&pdev->dev, "%d vfs allocated\n",
1790                          adapter->vfs_allocated_count);
1791                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
1792                         random_ether_addr(mac_addr);
1793                         igb_set_vf_mac(adapter, i, mac_addr);
1794                 }
1795         }
1796 #endif /* CONFIG_PCI_IOV */
1797 }
1798
1799
1800 /**
1801  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
1802  * @adapter: board private structure to initialize
1803  *
1804  * igb_init_hw_timer initializes the function pointer and values for the hw
1805  * timer found in hardware.
1806  **/
1807 static void igb_init_hw_timer(struct igb_adapter *adapter)
1808 {
1809         struct e1000_hw *hw = &adapter->hw;
1810
1811         switch (hw->mac.type) {
1812         case e1000_i350:
1813         case e1000_82580:
1814                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1815                 adapter->cycles.read = igb_read_clock;
1816                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1817                 adapter->cycles.mult = 1;
1818                 /*
1819                  * The 82580 timesync updates the system timer every 8ns by 8ns
1820                  * and the value cannot be shifted.  Instead we need to shift
1821                  * the registers to generate a 64bit timer value.  As a result
1822                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
1823                  * 24 in order to generate a larger value for synchronization.
1824                  */
1825                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
1826                 /* disable system timer temporarily by setting bit 31 */
1827                 wr32(E1000_TSAUXC, 0x80000000);
1828                 wrfl();
1829
1830                 /* Set registers so that rollover occurs soon to test this. */
1831                 wr32(E1000_SYSTIMR, 0x00000000);
1832                 wr32(E1000_SYSTIML, 0x80000000);
1833                 wr32(E1000_SYSTIMH, 0x000000FF);
1834                 wrfl();
1835
1836                 /* enable system timer by clearing bit 31 */
1837                 wr32(E1000_TSAUXC, 0x0);
1838                 wrfl();
1839
1840                 timecounter_init(&adapter->clock,
1841                                  &adapter->cycles,
1842                                  ktime_to_ns(ktime_get_real()));
1843                 /*
1844                  * Synchronize our NIC clock against system wall clock. NIC
1845                  * time stamp reading requires ~3us per sample, each sample
1846                  * was pretty stable even under load => only require 10
1847                  * samples for each offset comparison.
1848                  */
1849                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1850                 adapter->compare.source = &adapter->clock;
1851                 adapter->compare.target = ktime_get_real;
1852                 adapter->compare.num_samples = 10;
1853                 timecompare_update(&adapter->compare, 0);
1854                 break;
1855         case e1000_82576:
1856                 /*
1857                  * Initialize hardware timer: we keep it running just in case
1858                  * that some program needs it later on.
1859                  */
1860                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
1861                 adapter->cycles.read = igb_read_clock;
1862                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
1863                 adapter->cycles.mult = 1;
1864                 /**
1865                  * Scale the NIC clock cycle by a large factor so that
1866                  * relatively small clock corrections can be added or
1867                  * substracted at each clock tick. The drawbacks of a large
1868                  * factor are a) that the clock register overflows more quickly
1869                  * (not such a big deal) and b) that the increment per tick has
1870                  * to fit into 24 bits.  As a result we need to use a shift of
1871                  * 19 so we can fit a value of 16 into the TIMINCA register.
1872                  */
1873                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
1874                 wr32(E1000_TIMINCA,
1875                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
1876                                 (16 << IGB_82576_TSYNC_SHIFT));
1877
1878                 /* Set registers so that rollover occurs soon to test this. */
1879                 wr32(E1000_SYSTIML, 0x00000000);
1880                 wr32(E1000_SYSTIMH, 0xFF800000);
1881                 wrfl();
1882
1883                 timecounter_init(&adapter->clock,
1884                                  &adapter->cycles,
1885                                  ktime_to_ns(ktime_get_real()));
1886                 /*
1887                  * Synchronize our NIC clock against system wall clock. NIC
1888                  * time stamp reading requires ~3us per sample, each sample
1889                  * was pretty stable even under load => only require 10
1890                  * samples for each offset comparison.
1891                  */
1892                 memset(&adapter->compare, 0, sizeof(adapter->compare));
1893                 adapter->compare.source = &adapter->clock;
1894                 adapter->compare.target = ktime_get_real;
1895                 adapter->compare.num_samples = 10;
1896                 timecompare_update(&adapter->compare, 0);
1897                 break;
1898         case e1000_82575:
1899                 /* 82575 does not support timesync */
1900         default:
1901                 break;
1902         }
1903
1904 }
1905
1906 /**
1907  * igb_sw_init - Initialize general software structures (struct igb_adapter)
1908  * @adapter: board private structure to initialize
1909  *
1910  * igb_sw_init initializes the Adapter private data structure.
1911  * Fields are initialized based on PCI device information and
1912  * OS network device settings (MTU size).
1913  **/
1914 static int __devinit igb_sw_init(struct igb_adapter *adapter)
1915 {
1916         struct e1000_hw *hw = &adapter->hw;
1917         struct net_device *netdev = adapter->netdev;
1918         struct pci_dev *pdev = adapter->pdev;
1919
1920         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
1921
1922         adapter->tx_ring_count = IGB_DEFAULT_TXD;
1923         adapter->rx_ring_count = IGB_DEFAULT_RXD;
1924         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
1925         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
1926
1927         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
1928         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
1929
1930 #ifdef CONFIG_PCI_IOV
1931         if (hw->mac.type == e1000_82576)
1932                 adapter->vfs_allocated_count = max_vfs;
1933
1934 #endif /* CONFIG_PCI_IOV */
1935         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
1936
1937         /*
1938          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
1939          * then we should combine the queues into a queue pair in order to
1940          * conserve interrupts due to limited supply
1941          */
1942         if ((adapter->rss_queues > 4) ||
1943             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
1944                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1945
1946         /* This call may decrease the number of queues */
1947         if (igb_init_interrupt_scheme(adapter)) {
1948                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1949                 return -ENOMEM;
1950         }
1951
1952         igb_init_hw_timer(adapter);
1953         igb_probe_vfs(adapter);
1954
1955         /* Explicitly disable IRQ since the NIC can be in any state. */
1956         igb_irq_disable(adapter);
1957
1958         set_bit(__IGB_DOWN, &adapter->state);
1959         return 0;
1960 }
1961
1962 /**
1963  * igb_open - Called when a network interface is made active
1964  * @netdev: network interface device structure
1965  *
1966  * Returns 0 on success, negative value on failure
1967  *
1968  * The open entry point is called when a network interface is made
1969  * active by the system (IFF_UP).  At this point all resources needed
1970  * for transmit and receive operations are allocated, the interrupt
1971  * handler is registered with the OS, the watchdog timer is started,
1972  * and the stack is notified that the interface is ready.
1973  **/
1974 static int igb_open(struct net_device *netdev)
1975 {
1976         struct igb_adapter *adapter = netdev_priv(netdev);
1977         struct e1000_hw *hw = &adapter->hw;
1978         int err;
1979         int i;
1980
1981         /* disallow open during test */
1982         if (test_bit(__IGB_TESTING, &adapter->state))
1983                 return -EBUSY;
1984
1985         netif_carrier_off(netdev);
1986
1987         /* allocate transmit descriptors */
1988         err = igb_setup_all_tx_resources(adapter);
1989         if (err)
1990                 goto err_setup_tx;
1991
1992         /* allocate receive descriptors */
1993         err = igb_setup_all_rx_resources(adapter);
1994         if (err)
1995                 goto err_setup_rx;
1996
1997         igb_power_up_link(adapter);
1998
1999         /* before we allocate an interrupt, we must be ready to handle it.
2000          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2001          * as soon as we call pci_request_irq, so we have to setup our
2002          * clean_rx handler before we do so.  */
2003         igb_configure(adapter);
2004
2005         err = igb_request_irq(adapter);
2006         if (err)
2007                 goto err_req_irq;
2008
2009         /* From here on the code is the same as igb_up() */
2010         clear_bit(__IGB_DOWN, &adapter->state);
2011
2012         for (i = 0; i < adapter->num_q_vectors; i++) {
2013                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2014                 napi_enable(&q_vector->napi);
2015         }
2016
2017         /* Clear any pending interrupts. */
2018         rd32(E1000_ICR);
2019
2020         igb_irq_enable(adapter);
2021
2022         /* notify VFs that reset has been completed */
2023         if (adapter->vfs_allocated_count) {
2024                 u32 reg_data = rd32(E1000_CTRL_EXT);
2025                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2026                 wr32(E1000_CTRL_EXT, reg_data);
2027         }
2028
2029         netif_tx_start_all_queues(netdev);
2030
2031         /* start the watchdog. */
2032         hw->mac.get_link_status = 1;
2033         schedule_work(&adapter->watchdog_task);
2034
2035         return 0;
2036
2037 err_req_irq:
2038         igb_release_hw_control(adapter);
2039         igb_power_down_link(adapter);
2040         igb_free_all_rx_resources(adapter);
2041 err_setup_rx:
2042         igb_free_all_tx_resources(adapter);
2043 err_setup_tx:
2044         igb_reset(adapter);
2045
2046         return err;
2047 }
2048
2049 /**
2050  * igb_close - Disables a network interface
2051  * @netdev: network interface device structure
2052  *
2053  * Returns 0, this is not allowed to fail
2054  *
2055  * The close entry point is called when an interface is de-activated
2056  * by the OS.  The hardware is still under the driver's control, but
2057  * needs to be disabled.  A global MAC reset is issued to stop the
2058  * hardware, and all transmit and receive resources are freed.
2059  **/
2060 static int igb_close(struct net_device *netdev)
2061 {
2062         struct igb_adapter *adapter = netdev_priv(netdev);
2063
2064         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2065         igb_down(adapter);
2066
2067         igb_free_irq(adapter);
2068
2069         igb_free_all_tx_resources(adapter);
2070         igb_free_all_rx_resources(adapter);
2071
2072         return 0;
2073 }
2074
2075 /**
2076  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2077  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2078  *
2079  * Return 0 on success, negative on failure
2080  **/
2081 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2082 {
2083         struct pci_dev *pdev = tx_ring->pdev;
2084         int size;
2085
2086         size = sizeof(struct igb_buffer) * tx_ring->count;
2087         tx_ring->buffer_info = vmalloc(size);
2088         if (!tx_ring->buffer_info)
2089                 goto err;
2090         memset(tx_ring->buffer_info, 0, size);
2091
2092         /* round up to nearest 4K */
2093         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2094         tx_ring->size = ALIGN(tx_ring->size, 4096);
2095
2096         tx_ring->desc = pci_alloc_consistent(pdev,
2097                                              tx_ring->size,
2098                                              &tx_ring->dma);
2099
2100         if (!tx_ring->desc)
2101                 goto err;
2102
2103         tx_ring->next_to_use = 0;
2104         tx_ring->next_to_clean = 0;
2105         return 0;
2106
2107 err:
2108         vfree(tx_ring->buffer_info);
2109         dev_err(&pdev->dev,
2110                 "Unable to allocate memory for the transmit descriptor ring\n");
2111         return -ENOMEM;
2112 }
2113
2114 /**
2115  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2116  *                                (Descriptors) for all queues
2117  * @adapter: board private structure
2118  *
2119  * Return 0 on success, negative on failure
2120  **/
2121 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2122 {
2123         struct pci_dev *pdev = adapter->pdev;
2124         int i, err = 0;
2125
2126         for (i = 0; i < adapter->num_tx_queues; i++) {
2127                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2128                 if (err) {
2129                         dev_err(&pdev->dev,
2130                                 "Allocation for Tx Queue %u failed\n", i);
2131                         for (i--; i >= 0; i--)
2132                                 igb_free_tx_resources(adapter->tx_ring[i]);
2133                         break;
2134                 }
2135         }
2136
2137         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2138                 int r_idx = i % adapter->num_tx_queues;
2139                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2140         }
2141         return err;
2142 }
2143
2144 /**
2145  * igb_setup_tctl - configure the transmit control registers
2146  * @adapter: Board private structure
2147  **/
2148 void igb_setup_tctl(struct igb_adapter *adapter)
2149 {
2150         struct e1000_hw *hw = &adapter->hw;
2151         u32 tctl;
2152
2153         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2154         wr32(E1000_TXDCTL(0), 0);
2155
2156         /* Program the Transmit Control Register */
2157         tctl = rd32(E1000_TCTL);
2158         tctl &= ~E1000_TCTL_CT;
2159         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2160                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2161
2162         igb_config_collision_dist(hw);
2163
2164         /* Enable transmits */
2165         tctl |= E1000_TCTL_EN;
2166
2167         wr32(E1000_TCTL, tctl);
2168 }
2169
2170 /**
2171  * igb_configure_tx_ring - Configure transmit ring after Reset
2172  * @adapter: board private structure
2173  * @ring: tx ring to configure
2174  *
2175  * Configure a transmit ring after a reset.
2176  **/
2177 void igb_configure_tx_ring(struct igb_adapter *adapter,
2178                            struct igb_ring *ring)
2179 {
2180         struct e1000_hw *hw = &adapter->hw;
2181         u32 txdctl;
2182         u64 tdba = ring->dma;
2183         int reg_idx = ring->reg_idx;
2184
2185         /* disable the queue */
2186         txdctl = rd32(E1000_TXDCTL(reg_idx));
2187         wr32(E1000_TXDCTL(reg_idx),
2188                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2189         wrfl();
2190         mdelay(10);
2191
2192         wr32(E1000_TDLEN(reg_idx),
2193                         ring->count * sizeof(union e1000_adv_tx_desc));
2194         wr32(E1000_TDBAL(reg_idx),
2195                         tdba & 0x00000000ffffffffULL);
2196         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2197
2198         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2199         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2200         writel(0, ring->head);
2201         writel(0, ring->tail);
2202
2203         txdctl |= IGB_TX_PTHRESH;
2204         txdctl |= IGB_TX_HTHRESH << 8;
2205         txdctl |= IGB_TX_WTHRESH << 16;
2206
2207         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2208         wr32(E1000_TXDCTL(reg_idx), txdctl);
2209 }
2210
2211 /**
2212  * igb_configure_tx - Configure transmit Unit after Reset
2213  * @adapter: board private structure
2214  *
2215  * Configure the Tx unit of the MAC after a reset.
2216  **/
2217 static void igb_configure_tx(struct igb_adapter *adapter)
2218 {
2219         int i;
2220
2221         for (i = 0; i < adapter->num_tx_queues; i++)
2222                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2223 }
2224
2225 /**
2226  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2227  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2228  *
2229  * Returns 0 on success, negative on failure
2230  **/
2231 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2232 {
2233         struct pci_dev *pdev = rx_ring->pdev;
2234         int size, desc_len;
2235
2236         size = sizeof(struct igb_buffer) * rx_ring->count;
2237         rx_ring->buffer_info = vmalloc(size);
2238         if (!rx_ring->buffer_info)
2239                 goto err;
2240         memset(rx_ring->buffer_info, 0, size);
2241
2242         desc_len = sizeof(union e1000_adv_rx_desc);
2243
2244         /* Round up to nearest 4K */
2245         rx_ring->size = rx_ring->count * desc_len;
2246         rx_ring->size = ALIGN(rx_ring->size, 4096);
2247
2248         rx_ring->desc = pci_alloc_consistent(pdev, rx_ring->size,
2249                                              &rx_ring->dma);
2250
2251         if (!rx_ring->desc)
2252                 goto err;
2253
2254         rx_ring->next_to_clean = 0;
2255         rx_ring->next_to_use = 0;
2256
2257         return 0;
2258
2259 err:
2260         vfree(rx_ring->buffer_info);
2261         rx_ring->buffer_info = NULL;
2262         dev_err(&pdev->dev, "Unable to allocate memory for "
2263                 "the receive descriptor ring\n");
2264         return -ENOMEM;
2265 }
2266
2267 /**
2268  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2269  *                                (Descriptors) for all queues
2270  * @adapter: board private structure
2271  *
2272  * Return 0 on success, negative on failure
2273  **/
2274 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2275 {
2276         struct pci_dev *pdev = adapter->pdev;
2277         int i, err = 0;
2278
2279         for (i = 0; i < adapter->num_rx_queues; i++) {
2280                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2281                 if (err) {
2282                         dev_err(&pdev->dev,
2283                                 "Allocation for Rx Queue %u failed\n", i);
2284                         for (i--; i >= 0; i--)
2285                                 igb_free_rx_resources(adapter->rx_ring[i]);
2286                         break;
2287                 }
2288         }
2289
2290         return err;
2291 }
2292
2293 /**
2294  * igb_setup_mrqc - configure the multiple receive queue control registers
2295  * @adapter: Board private structure
2296  **/
2297 static void igb_setup_mrqc(struct igb_adapter *adapter)
2298 {
2299         struct e1000_hw *hw = &adapter->hw;
2300         u32 mrqc, rxcsum;
2301         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2302         union e1000_reta {
2303                 u32 dword;
2304                 u8  bytes[4];
2305         } reta;
2306         static const u8 rsshash[40] = {
2307                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2308                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2309                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2310                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2311
2312         /* Fill out hash function seeds */
2313         for (j = 0; j < 10; j++) {
2314                 u32 rsskey = rsshash[(j * 4)];
2315                 rsskey |= rsshash[(j * 4) + 1] << 8;
2316                 rsskey |= rsshash[(j * 4) + 2] << 16;
2317                 rsskey |= rsshash[(j * 4) + 3] << 24;
2318                 array_wr32(E1000_RSSRK(0), j, rsskey);
2319         }
2320
2321         num_rx_queues = adapter->rss_queues;
2322
2323         if (adapter->vfs_allocated_count) {
2324                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2325                 switch (hw->mac.type) {
2326                 case e1000_i350:
2327                 case e1000_82580:
2328                         num_rx_queues = 1;
2329                         shift = 0;
2330                         break;
2331                 case e1000_82576:
2332                         shift = 3;
2333                         num_rx_queues = 2;
2334                         break;
2335                 case e1000_82575:
2336                         shift = 2;
2337                         shift2 = 6;
2338                 default:
2339                         break;
2340                 }
2341         } else {
2342                 if (hw->mac.type == e1000_82575)
2343                         shift = 6;
2344         }
2345
2346         for (j = 0; j < (32 * 4); j++) {
2347                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2348                 if (shift2)
2349                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2350                 if ((j & 3) == 3)
2351                         wr32(E1000_RETA(j >> 2), reta.dword);
2352         }
2353
2354         /*
2355          * Disable raw packet checksumming so that RSS hash is placed in
2356          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2357          * offloads as they are enabled by default
2358          */
2359         rxcsum = rd32(E1000_RXCSUM);
2360         rxcsum |= E1000_RXCSUM_PCSD;
2361
2362         if (adapter->hw.mac.type >= e1000_82576)
2363                 /* Enable Receive Checksum Offload for SCTP */
2364                 rxcsum |= E1000_RXCSUM_CRCOFL;
2365
2366         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2367         wr32(E1000_RXCSUM, rxcsum);
2368
2369         /* If VMDq is enabled then we set the appropriate mode for that, else
2370          * we default to RSS so that an RSS hash is calculated per packet even
2371          * if we are only using one queue */
2372         if (adapter->vfs_allocated_count) {
2373                 if (hw->mac.type > e1000_82575) {
2374                         /* Set the default pool for the PF's first queue */
2375                         u32 vtctl = rd32(E1000_VT_CTL);
2376                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2377                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2378                         vtctl |= adapter->vfs_allocated_count <<
2379                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2380                         wr32(E1000_VT_CTL, vtctl);
2381                 }
2382                 if (adapter->rss_queues > 1)
2383                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2384                 else
2385                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2386         } else {
2387                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2388         }
2389         igb_vmm_control(adapter);
2390
2391         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2392                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2393         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2394                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2395         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2396                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2397         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2398                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2399
2400         wr32(E1000_MRQC, mrqc);
2401 }
2402
2403 /**
2404  * igb_setup_rctl - configure the receive control registers
2405  * @adapter: Board private structure
2406  **/
2407 void igb_setup_rctl(struct igb_adapter *adapter)
2408 {
2409         struct e1000_hw *hw = &adapter->hw;
2410         u32 rctl;
2411
2412         rctl = rd32(E1000_RCTL);
2413
2414         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2415         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2416
2417         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2418                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2419
2420         /*
2421          * enable stripping of CRC. It's unlikely this will break BMC
2422          * redirection as it did with e1000. Newer features require
2423          * that the HW strips the CRC.
2424          */
2425         rctl |= E1000_RCTL_SECRC;
2426
2427         /* disable store bad packets and clear size bits. */
2428         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2429
2430         /* enable LPE to prevent packets larger than max_frame_size */
2431         rctl |= E1000_RCTL_LPE;
2432
2433         /* disable queue 0 to prevent tail write w/o re-config */
2434         wr32(E1000_RXDCTL(0), 0);
2435
2436         /* Attention!!!  For SR-IOV PF driver operations you must enable
2437          * queue drop for all VF and PF queues to prevent head of line blocking
2438          * if an un-trusted VF does not provide descriptors to hardware.
2439          */
2440         if (adapter->vfs_allocated_count) {
2441                 /* set all queue drop enable bits */
2442                 wr32(E1000_QDE, ALL_QUEUES);
2443         }
2444
2445         wr32(E1000_RCTL, rctl);
2446 }
2447
2448 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2449                                    int vfn)
2450 {
2451         struct e1000_hw *hw = &adapter->hw;
2452         u32 vmolr;
2453
2454         /* if it isn't the PF check to see if VFs are enabled and
2455          * increase the size to support vlan tags */
2456         if (vfn < adapter->vfs_allocated_count &&
2457             adapter->vf_data[vfn].vlans_enabled)
2458                 size += VLAN_TAG_SIZE;
2459
2460         vmolr = rd32(E1000_VMOLR(vfn));
2461         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2462         vmolr |= size | E1000_VMOLR_LPE;
2463         wr32(E1000_VMOLR(vfn), vmolr);
2464
2465         return 0;
2466 }
2467
2468 /**
2469  * igb_rlpml_set - set maximum receive packet size
2470  * @adapter: board private structure
2471  *
2472  * Configure maximum receivable packet size.
2473  **/
2474 static void igb_rlpml_set(struct igb_adapter *adapter)
2475 {
2476         u32 max_frame_size = adapter->max_frame_size;
2477         struct e1000_hw *hw = &adapter->hw;
2478         u16 pf_id = adapter->vfs_allocated_count;
2479
2480         if (adapter->vlgrp)
2481                 max_frame_size += VLAN_TAG_SIZE;
2482
2483         /* if vfs are enabled we set RLPML to the largest possible request
2484          * size and set the VMOLR RLPML to the size we need */
2485         if (pf_id) {
2486                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2487                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2488         }
2489
2490         wr32(E1000_RLPML, max_frame_size);
2491 }
2492
2493 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2494                                  int vfn, bool aupe)
2495 {
2496         struct e1000_hw *hw = &adapter->hw;
2497         u32 vmolr;
2498
2499         /*
2500          * This register exists only on 82576 and newer so if we are older then
2501          * we should exit and do nothing
2502          */
2503         if (hw->mac.type < e1000_82576)
2504                 return;
2505
2506         vmolr = rd32(E1000_VMOLR(vfn));
2507         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2508         if (aupe)
2509                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2510         else
2511                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2512
2513         /* clear all bits that might not be set */
2514         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2515
2516         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2517                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2518         /*
2519          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2520          * multicast packets
2521          */
2522         if (vfn <= adapter->vfs_allocated_count)
2523                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2524
2525         wr32(E1000_VMOLR(vfn), vmolr);
2526 }
2527
2528 /**
2529  * igb_configure_rx_ring - Configure a receive ring after Reset
2530  * @adapter: board private structure
2531  * @ring: receive ring to be configured
2532  *
2533  * Configure the Rx unit of the MAC after a reset.
2534  **/
2535 void igb_configure_rx_ring(struct igb_adapter *adapter,
2536                            struct igb_ring *ring)
2537 {
2538         struct e1000_hw *hw = &adapter->hw;
2539         u64 rdba = ring->dma;
2540         int reg_idx = ring->reg_idx;
2541         u32 srrctl, rxdctl;
2542
2543         /* disable the queue */
2544         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2545         wr32(E1000_RXDCTL(reg_idx),
2546                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2547
2548         /* Set DMA base address registers */
2549         wr32(E1000_RDBAL(reg_idx),
2550              rdba & 0x00000000ffffffffULL);
2551         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2552         wr32(E1000_RDLEN(reg_idx),
2553                        ring->count * sizeof(union e1000_adv_rx_desc));
2554
2555         /* initialize head and tail */
2556         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2557         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2558         writel(0, ring->head);
2559         writel(0, ring->tail);
2560
2561         /* set descriptor configuration */
2562         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2563                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2564                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2565 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2566                 srrctl |= IGB_RXBUFFER_16384 >>
2567                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2568 #else
2569                 srrctl |= (PAGE_SIZE / 2) >>
2570                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2571 #endif
2572                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2573         } else {
2574                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2575                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2576                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2577         }
2578         if (hw->mac.type == e1000_82580)
2579                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2580         /* Only set Drop Enable if we are supporting multiple queues */
2581         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2582                 srrctl |= E1000_SRRCTL_DROP_EN;
2583
2584         wr32(E1000_SRRCTL(reg_idx), srrctl);
2585
2586         /* set filtering for VMDQ pools */
2587         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2588
2589         /* enable receive descriptor fetching */
2590         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2591         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2592         rxdctl &= 0xFFF00000;
2593         rxdctl |= IGB_RX_PTHRESH;
2594         rxdctl |= IGB_RX_HTHRESH << 8;
2595         rxdctl |= IGB_RX_WTHRESH << 16;
2596         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2597 }
2598
2599 /**
2600  * igb_configure_rx - Configure receive Unit after Reset
2601  * @adapter: board private structure
2602  *
2603  * Configure the Rx unit of the MAC after a reset.
2604  **/
2605 static void igb_configure_rx(struct igb_adapter *adapter)
2606 {
2607         int i;
2608
2609         /* set UTA to appropriate mode */
2610         igb_set_uta(adapter);
2611
2612         /* set the correct pool for the PF default MAC address in entry 0 */
2613         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2614                          adapter->vfs_allocated_count);
2615
2616         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2617          * the Base and Length of the Rx Descriptor Ring */
2618         for (i = 0; i < adapter->num_rx_queues; i++)
2619                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2620 }
2621
2622 /**
2623  * igb_free_tx_resources - Free Tx Resources per Queue
2624  * @tx_ring: Tx descriptor ring for a specific queue
2625  *
2626  * Free all transmit software resources
2627  **/
2628 void igb_free_tx_resources(struct igb_ring *tx_ring)
2629 {
2630         igb_clean_tx_ring(tx_ring);
2631
2632         vfree(tx_ring->buffer_info);
2633         tx_ring->buffer_info = NULL;
2634
2635         /* if not set, then don't free */
2636         if (!tx_ring->desc)
2637                 return;
2638
2639         pci_free_consistent(tx_ring->pdev, tx_ring->size,
2640                             tx_ring->desc, tx_ring->dma);
2641
2642         tx_ring->desc = NULL;
2643 }
2644
2645 /**
2646  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2647  * @adapter: board private structure
2648  *
2649  * Free all transmit software resources
2650  **/
2651 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2652 {
2653         int i;
2654
2655         for (i = 0; i < adapter->num_tx_queues; i++)
2656                 igb_free_tx_resources(adapter->tx_ring[i]);
2657 }
2658
2659 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
2660                                     struct igb_buffer *buffer_info)
2661 {
2662         if (buffer_info->dma) {
2663                 if (buffer_info->mapped_as_page)
2664                         pci_unmap_page(tx_ring->pdev,
2665                                         buffer_info->dma,
2666                                         buffer_info->length,
2667                                         PCI_DMA_TODEVICE);
2668                 else
2669                         pci_unmap_single(tx_ring->pdev,
2670                                         buffer_info->dma,
2671                                         buffer_info->length,
2672                                         PCI_DMA_TODEVICE);
2673                 buffer_info->dma = 0;
2674         }
2675         if (buffer_info->skb) {
2676                 dev_kfree_skb_any(buffer_info->skb);
2677                 buffer_info->skb = NULL;
2678         }
2679         buffer_info->time_stamp = 0;
2680         buffer_info->length = 0;
2681         buffer_info->next_to_watch = 0;
2682         buffer_info->mapped_as_page = false;
2683 }
2684
2685 /**
2686  * igb_clean_tx_ring - Free Tx Buffers
2687  * @tx_ring: ring to be cleaned
2688  **/
2689 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
2690 {
2691         struct igb_buffer *buffer_info;
2692         unsigned long size;
2693         unsigned int i;
2694
2695         if (!tx_ring->buffer_info)
2696                 return;
2697         /* Free all the Tx ring sk_buffs */
2698
2699         for (i = 0; i < tx_ring->count; i++) {
2700                 buffer_info = &tx_ring->buffer_info[i];
2701                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
2702         }
2703
2704         size = sizeof(struct igb_buffer) * tx_ring->count;
2705         memset(tx_ring->buffer_info, 0, size);
2706
2707         /* Zero out the descriptor ring */
2708         memset(tx_ring->desc, 0, tx_ring->size);
2709
2710         tx_ring->next_to_use = 0;
2711         tx_ring->next_to_clean = 0;
2712 }
2713
2714 /**
2715  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
2716  * @adapter: board private structure
2717  **/
2718 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
2719 {
2720         int i;
2721
2722         for (i = 0; i < adapter->num_tx_queues; i++)
2723                 igb_clean_tx_ring(adapter->tx_ring[i]);
2724 }
2725
2726 /**
2727  * igb_free_rx_resources - Free Rx Resources
2728  * @rx_ring: ring to clean the resources from
2729  *
2730  * Free all receive software resources
2731  **/
2732 void igb_free_rx_resources(struct igb_ring *rx_ring)
2733 {
2734         igb_clean_rx_ring(rx_ring);
2735
2736         vfree(rx_ring->buffer_info);
2737         rx_ring->buffer_info = NULL;
2738
2739         /* if not set, then don't free */
2740         if (!rx_ring->desc)
2741                 return;
2742
2743         pci_free_consistent(rx_ring->pdev, rx_ring->size,
2744                             rx_ring->desc, rx_ring->dma);
2745
2746         rx_ring->desc = NULL;
2747 }
2748
2749 /**
2750  * igb_free_all_rx_resources - Free Rx Resources for All Queues
2751  * @adapter: board private structure
2752  *
2753  * Free all receive software resources
2754  **/
2755 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
2756 {
2757         int i;
2758
2759         for (i = 0; i < adapter->num_rx_queues; i++)
2760                 igb_free_rx_resources(adapter->rx_ring[i]);
2761 }
2762
2763 /**
2764  * igb_clean_rx_ring - Free Rx Buffers per Queue
2765  * @rx_ring: ring to free buffers from
2766  **/
2767 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
2768 {
2769         struct igb_buffer *buffer_info;
2770         unsigned long size;
2771         unsigned int i;
2772
2773         if (!rx_ring->buffer_info)
2774                 return;
2775
2776         /* Free all the Rx ring sk_buffs */
2777         for (i = 0; i < rx_ring->count; i++) {
2778                 buffer_info = &rx_ring->buffer_info[i];
2779                 if (buffer_info->dma) {
2780                         pci_unmap_single(rx_ring->pdev,
2781                                          buffer_info->dma,
2782                                          rx_ring->rx_buffer_len,
2783                                          PCI_DMA_FROMDEVICE);
2784                         buffer_info->dma = 0;
2785                 }
2786
2787                 if (buffer_info->skb) {
2788                         dev_kfree_skb(buffer_info->skb);
2789                         buffer_info->skb = NULL;
2790                 }
2791                 if (buffer_info->page_dma) {
2792                         pci_unmap_page(rx_ring->pdev,
2793                                        buffer_info->page_dma,
2794                                        PAGE_SIZE / 2,
2795                                        PCI_DMA_FROMDEVICE);
2796                         buffer_info->page_dma = 0;
2797                 }
2798                 if (buffer_info->page) {
2799                         put_page(buffer_info->page);
2800                         buffer_info->page = NULL;
2801                         buffer_info->page_offset = 0;
2802                 }
2803         }
2804
2805         size = sizeof(struct igb_buffer) * rx_ring->count;
2806         memset(rx_ring->buffer_info, 0, size);
2807
2808         /* Zero out the descriptor ring */
2809         memset(rx_ring->desc, 0, rx_ring->size);
2810
2811         rx_ring->next_to_clean = 0;
2812         rx_ring->next_to_use = 0;
2813 }
2814
2815 /**
2816  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
2817  * @adapter: board private structure
2818  **/
2819 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
2820 {
2821         int i;
2822
2823         for (i = 0; i < adapter->num_rx_queues; i++)
2824                 igb_clean_rx_ring(adapter->rx_ring[i]);
2825 }
2826
2827 /**
2828  * igb_set_mac - Change the Ethernet Address of the NIC
2829  * @netdev: network interface device structure
2830  * @p: pointer to an address structure
2831  *
2832  * Returns 0 on success, negative on failure
2833  **/
2834 static int igb_set_mac(struct net_device *netdev, void *p)
2835 {
2836         struct igb_adapter *adapter = netdev_priv(netdev);
2837         struct e1000_hw *hw = &adapter->hw;
2838         struct sockaddr *addr = p;
2839
2840         if (!is_valid_ether_addr(addr->sa_data))
2841                 return -EADDRNOTAVAIL;
2842
2843         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
2844         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
2845
2846         /* set the correct pool for the new PF MAC address in entry 0 */
2847         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
2848                          adapter->vfs_allocated_count);
2849
2850         return 0;
2851 }
2852
2853 /**
2854  * igb_write_mc_addr_list - write multicast addresses to MTA
2855  * @netdev: network interface device structure
2856  *
2857  * Writes multicast address list to the MTA hash table.
2858  * Returns: -ENOMEM on failure
2859  *                0 on no addresses written
2860  *                X on writing X addresses to MTA
2861  **/
2862 static int igb_write_mc_addr_list(struct net_device *netdev)
2863 {
2864         struct igb_adapter *adapter = netdev_priv(netdev);
2865         struct e1000_hw *hw = &adapter->hw;
2866         struct netdev_hw_addr *ha;
2867         u8  *mta_list;
2868         int i;
2869
2870         if (netdev_mc_empty(netdev)) {
2871                 /* nothing to program, so clear mc list */
2872                 igb_update_mc_addr_list(hw, NULL, 0);
2873                 igb_restore_vf_multicasts(adapter);
2874                 return 0;
2875         }
2876
2877         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
2878         if (!mta_list)
2879                 return -ENOMEM;
2880
2881         /* The shared function expects a packed array of only addresses. */
2882         i = 0;
2883         netdev_for_each_mc_addr(ha, netdev)
2884                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
2885
2886         igb_update_mc_addr_list(hw, mta_list, i);
2887         kfree(mta_list);
2888
2889         return netdev_mc_count(netdev);
2890 }
2891
2892 /**
2893  * igb_write_uc_addr_list - write unicast addresses to RAR table
2894  * @netdev: network interface device structure
2895  *
2896  * Writes unicast address list to the RAR table.
2897  * Returns: -ENOMEM on failure/insufficient address space
2898  *                0 on no addresses written
2899  *                X on writing X addresses to the RAR table
2900  **/
2901 static int igb_write_uc_addr_list(struct net_device *netdev)
2902 {
2903         struct igb_adapter *adapter = netdev_priv(netdev);
2904         struct e1000_hw *hw = &adapter->hw;
2905         unsigned int vfn = adapter->vfs_allocated_count;
2906         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
2907         int count = 0;
2908
2909         /* return ENOMEM indicating insufficient memory for addresses */
2910         if (netdev_uc_count(netdev) > rar_entries)
2911                 return -ENOMEM;
2912
2913         if (!netdev_uc_empty(netdev) && rar_entries) {
2914                 struct netdev_hw_addr *ha;
2915
2916                 netdev_for_each_uc_addr(ha, netdev) {
2917                         if (!rar_entries)
2918                                 break;
2919                         igb_rar_set_qsel(adapter, ha->addr,
2920                                          rar_entries--,
2921                                          vfn);
2922                         count++;
2923                 }
2924         }
2925         /* write the addresses in reverse order to avoid write combining */
2926         for (; rar_entries > 0 ; rar_entries--) {
2927                 wr32(E1000_RAH(rar_entries), 0);
2928                 wr32(E1000_RAL(rar_entries), 0);
2929         }
2930         wrfl();
2931
2932         return count;
2933 }
2934
2935 /**
2936  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
2937  * @netdev: network interface device structure
2938  *
2939  * The set_rx_mode entry point is called whenever the unicast or multicast
2940  * address lists or the network interface flags are updated.  This routine is
2941  * responsible for configuring the hardware for proper unicast, multicast,
2942  * promiscuous mode, and all-multi behavior.
2943  **/
2944 static void igb_set_rx_mode(struct net_device *netdev)
2945 {
2946         struct igb_adapter *adapter = netdev_priv(netdev);
2947         struct e1000_hw *hw = &adapter->hw;
2948         unsigned int vfn = adapter->vfs_allocated_count;
2949         u32 rctl, vmolr = 0;
2950         int count;
2951
2952         /* Check for Promiscuous and All Multicast modes */
2953         rctl = rd32(E1000_RCTL);
2954
2955         /* clear the effected bits */
2956         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
2957
2958         if (netdev->flags & IFF_PROMISC) {
2959                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
2960                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
2961         } else {
2962                 if (netdev->flags & IFF_ALLMULTI) {
2963                         rctl |= E1000_RCTL_MPE;
2964                         vmolr |= E1000_VMOLR_MPME;
2965                 } else {
2966                         /*
2967                          * Write addresses to the MTA, if the attempt fails
2968                          * then we should just turn on promiscous mode so
2969                          * that we can at least receive multicast traffic
2970                          */
2971                         count = igb_write_mc_addr_list(netdev);
2972                         if (count < 0) {
2973                                 rctl |= E1000_RCTL_MPE;
2974                                 vmolr |= E1000_VMOLR_MPME;
2975                         } else if (count) {
2976                                 vmolr |= E1000_VMOLR_ROMPE;
2977                         }
2978                 }
2979                 /*
2980                  * Write addresses to available RAR registers, if there is not
2981                  * sufficient space to store all the addresses then enable
2982                  * unicast promiscous mode
2983                  */
2984                 count = igb_write_uc_addr_list(netdev);
2985                 if (count < 0) {
2986                         rctl |= E1000_RCTL_UPE;
2987                         vmolr |= E1000_VMOLR_ROPE;
2988                 }
2989                 rctl |= E1000_RCTL_VFE;
2990         }
2991         wr32(E1000_RCTL, rctl);
2992
2993         /*
2994          * In order to support SR-IOV and eventually VMDq it is necessary to set
2995          * the VMOLR to enable the appropriate modes.  Without this workaround
2996          * we will have issues with VLAN tag stripping not being done for frames
2997          * that are only arriving because we are the default pool
2998          */
2999         if (hw->mac.type < e1000_82576)
3000                 return;
3001
3002         vmolr |= rd32(E1000_VMOLR(vfn)) &
3003                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3004         wr32(E1000_VMOLR(vfn), vmolr);
3005         igb_restore_vf_multicasts(adapter);
3006 }
3007
3008 /* Need to wait a few seconds after link up to get diagnostic information from
3009  * the phy */
3010 static void igb_update_phy_info(unsigned long data)
3011 {
3012         struct igb_adapter *adapter = (struct igb_adapter *) data;
3013         igb_get_phy_info(&adapter->hw);
3014 }
3015
3016 /**
3017  * igb_has_link - check shared code for link and determine up/down
3018  * @adapter: pointer to driver private info
3019  **/
3020 bool igb_has_link(struct igb_adapter *adapter)
3021 {
3022         struct e1000_hw *hw = &adapter->hw;
3023         bool link_active = false;
3024         s32 ret_val = 0;
3025
3026         /* get_link_status is set on LSC (link status) interrupt or
3027          * rx sequence error interrupt.  get_link_status will stay
3028          * false until the e1000_check_for_link establishes link
3029          * for copper adapters ONLY
3030          */
3031         switch (hw->phy.media_type) {
3032         case e1000_media_type_copper:
3033                 if (hw->mac.get_link_status) {
3034                         ret_val = hw->mac.ops.check_for_link(hw);
3035                         link_active = !hw->mac.get_link_status;
3036                 } else {
3037                         link_active = true;
3038                 }
3039                 break;
3040         case e1000_media_type_internal_serdes:
3041                 ret_val = hw->mac.ops.check_for_link(hw);
3042                 link_active = hw->mac.serdes_has_link;
3043                 break;
3044         default:
3045         case e1000_media_type_unknown:
3046                 break;
3047         }
3048
3049         return link_active;
3050 }
3051
3052 /**
3053  * igb_watchdog - Timer Call-back
3054  * @data: pointer to adapter cast into an unsigned long
3055  **/
3056 static void igb_watchdog(unsigned long data)
3057 {
3058         struct igb_adapter *adapter = (struct igb_adapter *)data;
3059         /* Do the rest outside of interrupt context */
3060         schedule_work(&adapter->watchdog_task);
3061 }
3062
3063 static void igb_watchdog_task(struct work_struct *work)
3064 {
3065         struct igb_adapter *adapter = container_of(work,
3066                                                    struct igb_adapter,
3067                                                    watchdog_task);
3068         struct e1000_hw *hw = &adapter->hw;
3069         struct net_device *netdev = adapter->netdev;
3070         u32 link;
3071         int i;
3072
3073         link = igb_has_link(adapter);
3074         if (link) {
3075                 if (!netif_carrier_ok(netdev)) {
3076                         u32 ctrl;
3077                         hw->mac.ops.get_speed_and_duplex(hw,
3078                                                          &adapter->link_speed,
3079                                                          &adapter->link_duplex);
3080
3081                         ctrl = rd32(E1000_CTRL);
3082                         /* Links status message must follow this format */
3083                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3084                                  "Flow Control: %s\n",
3085                                netdev->name,
3086                                adapter->link_speed,
3087                                adapter->link_duplex == FULL_DUPLEX ?
3088                                  "Full Duplex" : "Half Duplex",
3089                                ((ctrl & E1000_CTRL_TFCE) &&
3090                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3091                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3092                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3093
3094                         /* adjust timeout factor according to speed/duplex */
3095                         adapter->tx_timeout_factor = 1;
3096                         switch (adapter->link_speed) {
3097                         case SPEED_10:
3098                                 adapter->tx_timeout_factor = 14;
3099                                 break;
3100                         case SPEED_100:
3101                                 /* maybe add some timeout factor ? */
3102                                 break;
3103                         }
3104
3105                         netif_carrier_on(netdev);
3106
3107                         igb_ping_all_vfs(adapter);
3108
3109                         /* link state has changed, schedule phy info update */
3110                         if (!test_bit(__IGB_DOWN, &adapter->state))
3111                                 mod_timer(&adapter->phy_info_timer,
3112                                           round_jiffies(jiffies + 2 * HZ));
3113                 }
3114         } else {
3115                 if (netif_carrier_ok(netdev)) {
3116                         adapter->link_speed = 0;
3117                         adapter->link_duplex = 0;
3118                         /* Links status message must follow this format */
3119                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3120                                netdev->name);
3121                         netif_carrier_off(netdev);
3122
3123                         igb_ping_all_vfs(adapter);
3124
3125                         /* link state has changed, schedule phy info update */
3126                         if (!test_bit(__IGB_DOWN, &adapter->state))
3127                                 mod_timer(&adapter->phy_info_timer,
3128                                           round_jiffies(jiffies + 2 * HZ));
3129                 }
3130         }
3131
3132         igb_update_stats(adapter);
3133
3134         for (i = 0; i < adapter->num_tx_queues; i++) {
3135                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3136                 if (!netif_carrier_ok(netdev)) {
3137                         /* We've lost link, so the controller stops DMA,
3138                          * but we've got queued Tx work that's never going
3139                          * to get done, so reset controller to flush Tx.
3140                          * (Do the reset outside of interrupt context). */
3141                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3142                                 adapter->tx_timeout_count++;
3143                                 schedule_work(&adapter->reset_task);
3144                                 /* return immediately since reset is imminent */
3145                                 return;
3146                         }
3147                 }
3148
3149                 /* Force detection of hung controller every watchdog period */
3150                 tx_ring->detect_tx_hung = true;
3151         }
3152
3153         /* Cause software interrupt to ensure rx ring is cleaned */
3154         if (adapter->msix_entries) {
3155                 u32 eics = 0;
3156                 for (i = 0; i < adapter->num_q_vectors; i++) {
3157                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3158                         eics |= q_vector->eims_value;
3159                 }
3160                 wr32(E1000_EICS, eics);
3161         } else {
3162                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3163         }
3164
3165         /* Reset the timer */
3166         if (!test_bit(__IGB_DOWN, &adapter->state))
3167                 mod_timer(&adapter->watchdog_timer,
3168                           round_jiffies(jiffies + 2 * HZ));
3169 }
3170
3171 enum latency_range {
3172         lowest_latency = 0,
3173         low_latency = 1,
3174         bulk_latency = 2,
3175         latency_invalid = 255
3176 };
3177
3178 /**
3179  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3180  *
3181  *      Stores a new ITR value based on strictly on packet size.  This
3182  *      algorithm is less sophisticated than that used in igb_update_itr,
3183  *      due to the difficulty of synchronizing statistics across multiple
3184  *      receive rings.  The divisors and thresholds used by this fuction
3185  *      were determined based on theoretical maximum wire speed and testing
3186  *      data, in order to minimize response time while increasing bulk
3187  *      throughput.
3188  *      This functionality is controlled by the InterruptThrottleRate module
3189  *      parameter (see igb_param.c)
3190  *      NOTE:  This function is called only when operating in a multiqueue
3191  *             receive environment.
3192  * @q_vector: pointer to q_vector
3193  **/
3194 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3195 {
3196         int new_val = q_vector->itr_val;
3197         int avg_wire_size = 0;
3198         struct igb_adapter *adapter = q_vector->adapter;
3199
3200         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3201          * ints/sec - ITR timer value of 120 ticks.
3202          */
3203         if (adapter->link_speed != SPEED_1000) {
3204                 new_val = 976;
3205                 goto set_itr_val;
3206         }
3207
3208         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3209                 struct igb_ring *ring = q_vector->rx_ring;
3210                 avg_wire_size = ring->total_bytes / ring->total_packets;
3211         }
3212
3213         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3214                 struct igb_ring *ring = q_vector->tx_ring;
3215                 avg_wire_size = max_t(u32, avg_wire_size,
3216                                       (ring->total_bytes /
3217                                        ring->total_packets));
3218         }
3219
3220         /* if avg_wire_size isn't set no work was done */
3221         if (!avg_wire_size)
3222                 goto clear_counts;
3223
3224         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3225         avg_wire_size += 24;
3226
3227         /* Don't starve jumbo frames */
3228         avg_wire_size = min(avg_wire_size, 3000);
3229
3230         /* Give a little boost to mid-size frames */
3231         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3232                 new_val = avg_wire_size / 3;
3233         else
3234                 new_val = avg_wire_size / 2;
3235
3236         /* when in itr mode 3 do not exceed 20K ints/sec */
3237         if (adapter->rx_itr_setting == 3 && new_val < 196)
3238                 new_val = 196;
3239
3240 set_itr_val:
3241         if (new_val != q_vector->itr_val) {
3242                 q_vector->itr_val = new_val;
3243                 q_vector->set_itr = 1;
3244         }
3245 clear_counts:
3246         if (q_vector->rx_ring) {
3247                 q_vector->rx_ring->total_bytes = 0;
3248                 q_vector->rx_ring->total_packets = 0;
3249         }
3250         if (q_vector->tx_ring) {
3251                 q_vector->tx_ring->total_bytes = 0;
3252                 q_vector->tx_ring->total_packets = 0;
3253         }
3254 }
3255
3256 /**
3257  * igb_update_itr - update the dynamic ITR value based on statistics
3258  *      Stores a new ITR value based on packets and byte
3259  *      counts during the last interrupt.  The advantage of per interrupt
3260  *      computation is faster updates and more accurate ITR for the current
3261  *      traffic pattern.  Constants in this function were computed
3262  *      based on theoretical maximum wire speed and thresholds were set based
3263  *      on testing data as well as attempting to minimize response time
3264  *      while increasing bulk throughput.
3265  *      this functionality is controlled by the InterruptThrottleRate module
3266  *      parameter (see igb_param.c)
3267  *      NOTE:  These calculations are only valid when operating in a single-
3268  *             queue environment.
3269  * @adapter: pointer to adapter
3270  * @itr_setting: current q_vector->itr_val
3271  * @packets: the number of packets during this measurement interval
3272  * @bytes: the number of bytes during this measurement interval
3273  **/
3274 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3275                                    int packets, int bytes)
3276 {
3277         unsigned int retval = itr_setting;
3278
3279         if (packets == 0)
3280                 goto update_itr_done;
3281
3282         switch (itr_setting) {
3283         case lowest_latency:
3284                 /* handle TSO and jumbo frames */
3285                 if (bytes/packets > 8000)
3286                         retval = bulk_latency;
3287                 else if ((packets < 5) && (bytes > 512))
3288                         retval = low_latency;
3289                 break;
3290         case low_latency:  /* 50 usec aka 20000 ints/s */
3291                 if (bytes > 10000) {
3292                         /* this if handles the TSO accounting */
3293                         if (bytes/packets > 8000) {
3294                                 retval = bulk_latency;
3295                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3296                                 retval = bulk_latency;
3297                         } else if ((packets > 35)) {
3298                                 retval = lowest_latency;
3299                         }
3300                 } else if (bytes/packets > 2000) {
3301                         retval = bulk_latency;
3302                 } else if (packets <= 2 && bytes < 512) {
3303                         retval = lowest_latency;
3304                 }
3305                 break;
3306         case bulk_latency: /* 250 usec aka 4000 ints/s */
3307                 if (bytes > 25000) {
3308                         if (packets > 35)
3309                                 retval = low_latency;
3310                 } else if (bytes < 1500) {
3311                         retval = low_latency;
3312                 }
3313                 break;
3314         }
3315
3316 update_itr_done:
3317         return retval;
3318 }
3319
3320 static void igb_set_itr(struct igb_adapter *adapter)
3321 {
3322         struct igb_q_vector *q_vector = adapter->q_vector[0];
3323         u16 current_itr;
3324         u32 new_itr = q_vector->itr_val;
3325
3326         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3327         if (adapter->link_speed != SPEED_1000) {
3328                 current_itr = 0;
3329                 new_itr = 4000;
3330                 goto set_itr_now;
3331         }
3332
3333         adapter->rx_itr = igb_update_itr(adapter,
3334                                     adapter->rx_itr,
3335                                     q_vector->rx_ring->total_packets,
3336                                     q_vector->rx_ring->total_bytes);
3337
3338         adapter->tx_itr = igb_update_itr(adapter,
3339                                     adapter->tx_itr,
3340                                     q_vector->tx_ring->total_packets,
3341                                     q_vector->tx_ring->total_bytes);
3342         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3343
3344         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3345         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3346                 current_itr = low_latency;
3347
3348         switch (current_itr) {
3349         /* counts and packets in update_itr are dependent on these numbers */
3350         case lowest_latency:
3351                 new_itr = 56;  /* aka 70,000 ints/sec */
3352                 break;
3353         case low_latency:
3354                 new_itr = 196; /* aka 20,000 ints/sec */
3355                 break;
3356         case bulk_latency:
3357                 new_itr = 980; /* aka 4,000 ints/sec */
3358                 break;
3359         default:
3360                 break;
3361         }
3362
3363 set_itr_now:
3364         q_vector->rx_ring->total_bytes = 0;
3365         q_vector->rx_ring->total_packets = 0;
3366         q_vector->tx_ring->total_bytes = 0;
3367         q_vector->tx_ring->total_packets = 0;
3368
3369         if (new_itr != q_vector->itr_val) {
3370                 /* this attempts to bias the interrupt rate towards Bulk
3371                  * by adding intermediate steps when interrupt rate is
3372                  * increasing */
3373                 new_itr = new_itr > q_vector->itr_val ?
3374                              max((new_itr * q_vector->itr_val) /
3375                                  (new_itr + (q_vector->itr_val >> 2)),
3376                                  new_itr) :
3377                              new_itr;
3378                 /* Don't write the value here; it resets the adapter's
3379                  * internal timer, and causes us to delay far longer than
3380                  * we should between interrupts.  Instead, we write the ITR
3381                  * value at the beginning of the next interrupt so the timing
3382                  * ends up being correct.
3383                  */
3384                 q_vector->itr_val = new_itr;
3385                 q_vector->set_itr = 1;
3386         }
3387
3388         return;
3389 }
3390
3391 #define IGB_TX_FLAGS_CSUM               0x00000001
3392 #define IGB_TX_FLAGS_VLAN               0x00000002
3393 #define IGB_TX_FLAGS_TSO                0x00000004
3394 #define IGB_TX_FLAGS_IPV4               0x00000008
3395 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3396 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3397 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3398
3399 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3400                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3401 {
3402         struct e1000_adv_tx_context_desc *context_desc;
3403         unsigned int i;
3404         int err;
3405         struct igb_buffer *buffer_info;
3406         u32 info = 0, tu_cmd = 0;
3407         u32 mss_l4len_idx;
3408         u8 l4len;
3409
3410         if (skb_header_cloned(skb)) {
3411                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3412                 if (err)
3413                         return err;
3414         }
3415
3416         l4len = tcp_hdrlen(skb);
3417         *hdr_len += l4len;
3418
3419         if (skb->protocol == htons(ETH_P_IP)) {
3420                 struct iphdr *iph = ip_hdr(skb);
3421                 iph->tot_len = 0;
3422                 iph->check = 0;
3423                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3424                                                          iph->daddr, 0,
3425                                                          IPPROTO_TCP,
3426                                                          0);
3427         } else if (skb_is_gso_v6(skb)) {
3428                 ipv6_hdr(skb)->payload_len = 0;
3429                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3430                                                        &ipv6_hdr(skb)->daddr,
3431                                                        0, IPPROTO_TCP, 0);
3432         }
3433
3434         i = tx_ring->next_to_use;
3435
3436         buffer_info = &tx_ring->buffer_info[i];
3437         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3438         /* VLAN MACLEN IPLEN */
3439         if (tx_flags & IGB_TX_FLAGS_VLAN)
3440                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3441         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3442         *hdr_len += skb_network_offset(skb);
3443         info |= skb_network_header_len(skb);
3444         *hdr_len += skb_network_header_len(skb);
3445         context_desc->vlan_macip_lens = cpu_to_le32(info);
3446
3447         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3448         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3449
3450         if (skb->protocol == htons(ETH_P_IP))
3451                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3452         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3453
3454         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3455
3456         /* MSS L4LEN IDX */
3457         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3458         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3459
3460         /* For 82575, context index must be unique per ring. */
3461         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3462                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3463
3464         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3465         context_desc->seqnum_seed = 0;
3466
3467         buffer_info->time_stamp = jiffies;
3468         buffer_info->next_to_watch = i;
3469         buffer_info->dma = 0;
3470         i++;
3471         if (i == tx_ring->count)
3472                 i = 0;
3473
3474         tx_ring->next_to_use = i;
3475
3476         return true;
3477 }
3478
3479 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3480                                    struct sk_buff *skb, u32 tx_flags)
3481 {
3482         struct e1000_adv_tx_context_desc *context_desc;
3483         struct pci_dev *pdev = tx_ring->pdev;
3484         struct igb_buffer *buffer_info;
3485         u32 info = 0, tu_cmd = 0;
3486         unsigned int i;
3487
3488         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3489             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3490                 i = tx_ring->next_to_use;
3491                 buffer_info = &tx_ring->buffer_info[i];
3492                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3493
3494                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3495                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3496
3497                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3498                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3499                         info |= skb_network_header_len(skb);
3500
3501                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3502
3503                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3504
3505                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3506                         __be16 protocol;
3507
3508                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3509                                 const struct vlan_ethhdr *vhdr =
3510                                           (const struct vlan_ethhdr*)skb->data;
3511
3512                                 protocol = vhdr->h_vlan_encapsulated_proto;
3513                         } else {
3514                                 protocol = skb->protocol;
3515                         }
3516
3517                         switch (protocol) {
3518                         case cpu_to_be16(ETH_P_IP):
3519                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3520                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3521                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3522                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3523                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3524                                 break;
3525                         case cpu_to_be16(ETH_P_IPV6):
3526                                 /* XXX what about other V6 headers?? */
3527                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3528                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3529                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3530                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3531                                 break;
3532                         default:
3533                                 if (unlikely(net_ratelimit()))
3534                                         dev_warn(&pdev->dev,
3535                                             "partial checksum but proto=%x!\n",
3536                                             skb->protocol);
3537                                 break;
3538                         }
3539                 }
3540
3541                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3542                 context_desc->seqnum_seed = 0;
3543                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3544                         context_desc->mss_l4len_idx =
3545                                 cpu_to_le32(tx_ring->reg_idx << 4);
3546
3547                 buffer_info->time_stamp = jiffies;
3548                 buffer_info->next_to_watch = i;
3549                 buffer_info->dma = 0;
3550
3551                 i++;
3552                 if (i == tx_ring->count)
3553                         i = 0;
3554                 tx_ring->next_to_use = i;
3555
3556                 return true;
3557         }
3558         return false;
3559 }
3560
3561 #define IGB_MAX_TXD_PWR 16
3562 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3563
3564 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3565                                  unsigned int first)
3566 {
3567         struct igb_buffer *buffer_info;
3568         struct pci_dev *pdev = tx_ring->pdev;
3569         unsigned int len = skb_headlen(skb);
3570         unsigned int count = 0, i;
3571         unsigned int f;
3572
3573         i = tx_ring->next_to_use;
3574
3575         buffer_info = &tx_ring->buffer_info[i];
3576         BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3577         buffer_info->length = len;
3578         /* set time_stamp *before* dma to help avoid a possible race */
3579         buffer_info->time_stamp = jiffies;
3580         buffer_info->next_to_watch = i;
3581         buffer_info->dma = pci_map_single(pdev, skb->data, len,
3582                                           PCI_DMA_TODEVICE);
3583         if (pci_dma_mapping_error(pdev, buffer_info->dma))
3584                 goto dma_error;
3585
3586         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3587                 struct skb_frag_struct *frag;
3588
3589                 count++;
3590                 i++;
3591                 if (i == tx_ring->count)
3592                         i = 0;
3593
3594                 frag = &skb_shinfo(skb)->frags[f];
3595                 len = frag->size;
3596
3597                 buffer_info = &tx_ring->buffer_info[i];
3598                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3599                 buffer_info->length = len;
3600                 buffer_info->time_stamp = jiffies;
3601                 buffer_info->next_to_watch = i;
3602                 buffer_info->mapped_as_page = true;
3603                 buffer_info->dma = pci_map_page(pdev,
3604                                                 frag->page,
3605                                                 frag->page_offset,
3606                                                 len,
3607                                                 PCI_DMA_TODEVICE);
3608                 if (pci_dma_mapping_error(pdev, buffer_info->dma))
3609                         goto dma_error;
3610
3611         }
3612
3613         tx_ring->buffer_info[i].skb = skb;
3614         tx_ring->buffer_info[i].gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3615         tx_ring->buffer_info[first].next_to_watch = i;
3616
3617         return ++count;
3618
3619 dma_error:
3620         dev_err(&pdev->dev, "TX DMA map failed\n");
3621
3622         /* clear timestamp and dma mappings for failed buffer_info mapping */
3623         buffer_info->dma = 0;
3624         buffer_info->time_stamp = 0;
3625         buffer_info->length = 0;
3626         buffer_info->next_to_watch = 0;
3627         buffer_info->mapped_as_page = false;
3628
3629         /* clear timestamp and dma mappings for remaining portion of packet */
3630         while (count--) {
3631                 if (i == 0)
3632                         i = tx_ring->count;
3633                 i--;
3634                 buffer_info = &tx_ring->buffer_info[i];
3635                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3636         }
3637
3638         return 0;
3639 }
3640
3641 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3642                                     u32 tx_flags, int count, u32 paylen,
3643                                     u8 hdr_len)
3644 {
3645         union e1000_adv_tx_desc *tx_desc;
3646         struct igb_buffer *buffer_info;
3647         u32 olinfo_status = 0, cmd_type_len;
3648         unsigned int i = tx_ring->next_to_use;
3649
3650         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3651                         E1000_ADVTXD_DCMD_DEXT);
3652
3653         if (tx_flags & IGB_TX_FLAGS_VLAN)
3654                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3655
3656         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3657                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3658
3659         if (tx_flags & IGB_TX_FLAGS_TSO) {
3660                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
3661
3662                 /* insert tcp checksum */
3663                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3664
3665                 /* insert ip checksum */
3666                 if (tx_flags & IGB_TX_FLAGS_IPV4)
3667                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
3668
3669         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
3670                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
3671         }
3672
3673         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
3674             (tx_flags & (IGB_TX_FLAGS_CSUM |
3675                          IGB_TX_FLAGS_TSO |
3676                          IGB_TX_FLAGS_VLAN)))
3677                 olinfo_status |= tx_ring->reg_idx << 4;
3678
3679         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
3680
3681         do {
3682                 buffer_info = &tx_ring->buffer_info[i];
3683                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
3684                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
3685                 tx_desc->read.cmd_type_len =
3686                         cpu_to_le32(cmd_type_len | buffer_info->length);
3687                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
3688                 count--;
3689                 i++;
3690                 if (i == tx_ring->count)
3691                         i = 0;
3692         } while (count > 0);
3693
3694         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
3695         /* Force memory writes to complete before letting h/w
3696          * know there are new descriptors to fetch.  (Only
3697          * applicable for weak-ordered memory model archs,
3698          * such as IA-64). */
3699         wmb();
3700
3701         tx_ring->next_to_use = i;
3702         writel(i, tx_ring->tail);
3703         /* we need this if more than one processor can write to our tail
3704          * at a time, it syncronizes IO on IA64/Altix systems */
3705         mmiowb();
3706 }
3707
3708 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3709 {
3710         struct net_device *netdev = tx_ring->netdev;
3711
3712         netif_stop_subqueue(netdev, tx_ring->queue_index);
3713
3714         /* Herbert's original patch had:
3715          *  smp_mb__after_netif_stop_queue();
3716          * but since that doesn't exist yet, just open code it. */
3717         smp_mb();
3718
3719         /* We need to check again in a case another CPU has just
3720          * made room available. */
3721         if (igb_desc_unused(tx_ring) < size)
3722                 return -EBUSY;
3723
3724         /* A reprieve! */
3725         netif_wake_subqueue(netdev, tx_ring->queue_index);
3726         tx_ring->tx_stats.restart_queue++;
3727         return 0;
3728 }
3729
3730 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
3731 {
3732         if (igb_desc_unused(tx_ring) >= size)
3733                 return 0;
3734         return __igb_maybe_stop_tx(tx_ring, size);
3735 }
3736
3737 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
3738                                     struct igb_ring *tx_ring)
3739 {
3740         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
3741         int tso = 0, count;
3742         u32 tx_flags = 0;
3743         u16 first;
3744         u8 hdr_len = 0;
3745         union skb_shared_tx *shtx = skb_tx(skb);
3746
3747         /* need: 1 descriptor per page,
3748          *       + 2 desc gap to keep tail from touching head,
3749          *       + 1 desc for skb->data,
3750          *       + 1 desc for context descriptor,
3751          * otherwise try next time */
3752         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
3753                 /* this is a hard error */
3754                 return NETDEV_TX_BUSY;
3755         }
3756
3757         if (unlikely(shtx->hardware)) {
3758                 shtx->in_progress = 1;
3759                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
3760         }
3761
3762         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
3763                 tx_flags |= IGB_TX_FLAGS_VLAN;
3764                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
3765         }
3766
3767         if (skb->protocol == htons(ETH_P_IP))
3768                 tx_flags |= IGB_TX_FLAGS_IPV4;
3769
3770         first = tx_ring->next_to_use;
3771         if (skb_is_gso(skb)) {
3772                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
3773
3774                 if (tso < 0) {
3775                         dev_kfree_skb_any(skb);
3776                         return NETDEV_TX_OK;
3777                 }
3778         }
3779
3780         if (tso)
3781                 tx_flags |= IGB_TX_FLAGS_TSO;
3782         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
3783                  (skb->ip_summed == CHECKSUM_PARTIAL))
3784                 tx_flags |= IGB_TX_FLAGS_CSUM;
3785
3786         /*
3787          * count reflects descriptors mapped, if 0 or less then mapping error
3788          * has occured and we need to rewind the descriptor queue
3789          */
3790         count = igb_tx_map_adv(tx_ring, skb, first);
3791         if (!count) {
3792                 dev_kfree_skb_any(skb);
3793                 tx_ring->buffer_info[first].time_stamp = 0;
3794                 tx_ring->next_to_use = first;
3795                 return NETDEV_TX_OK;
3796         }
3797
3798         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
3799
3800         /* Make sure there is space in the ring for the next send. */
3801         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
3802
3803         return NETDEV_TX_OK;
3804 }
3805
3806 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
3807                                       struct net_device *netdev)
3808 {
3809         struct igb_adapter *adapter = netdev_priv(netdev);
3810         struct igb_ring *tx_ring;
3811         int r_idx = 0;
3812
3813         if (test_bit(__IGB_DOWN, &adapter->state)) {
3814                 dev_kfree_skb_any(skb);
3815                 return NETDEV_TX_OK;
3816         }
3817
3818         if (skb->len <= 0) {
3819                 dev_kfree_skb_any(skb);
3820                 return NETDEV_TX_OK;
3821         }
3822
3823         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
3824         tx_ring = adapter->multi_tx_table[r_idx];
3825
3826         /* This goes back to the question of how to logically map a tx queue
3827          * to a flow.  Right now, performance is impacted slightly negatively
3828          * if using multiple tx queues.  If the stack breaks away from a
3829          * single qdisc implementation, we can look at this again. */
3830         return igb_xmit_frame_ring_adv(skb, tx_ring);
3831 }
3832
3833 /**
3834  * igb_tx_timeout - Respond to a Tx Hang
3835  * @netdev: network interface device structure
3836  **/
3837 static void igb_tx_timeout(struct net_device *netdev)
3838 {
3839         struct igb_adapter *adapter = netdev_priv(netdev);
3840         struct e1000_hw *hw = &adapter->hw;
3841
3842         /* Do the reset outside of interrupt context */
3843         adapter->tx_timeout_count++;
3844
3845         if (hw->mac.type == e1000_82580)
3846                 hw->dev_spec._82575.global_device_reset = true;
3847
3848         schedule_work(&adapter->reset_task);
3849         wr32(E1000_EICS,
3850              (adapter->eims_enable_mask & ~adapter->eims_other));
3851 }
3852
3853 static void igb_reset_task(struct work_struct *work)
3854 {
3855         struct igb_adapter *adapter;
3856         adapter = container_of(work, struct igb_adapter, reset_task);
3857
3858         igb_reinit_locked(adapter);
3859 }
3860
3861 /**
3862  * igb_get_stats - Get System Network Statistics
3863  * @netdev: network interface device structure
3864  *
3865  * Returns the address of the device statistics structure.
3866  * The statistics are actually updated from the timer callback.
3867  **/
3868 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
3869 {
3870         /* only return the current stats */
3871         return &netdev->stats;
3872 }
3873
3874 /**
3875  * igb_change_mtu - Change the Maximum Transfer Unit
3876  * @netdev: network interface device structure
3877  * @new_mtu: new value for maximum frame size
3878  *
3879  * Returns 0 on success, negative on failure
3880  **/
3881 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
3882 {
3883         struct igb_adapter *adapter = netdev_priv(netdev);
3884         struct pci_dev *pdev = adapter->pdev;
3885         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
3886         u32 rx_buffer_len, i;
3887
3888         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
3889                 dev_err(&pdev->dev, "Invalid MTU setting\n");
3890                 return -EINVAL;
3891         }
3892
3893         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
3894                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
3895                 return -EINVAL;
3896         }
3897
3898         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
3899                 msleep(1);
3900
3901         /* igb_down has a dependency on max_frame_size */
3902         adapter->max_frame_size = max_frame;
3903
3904         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
3905          * means we reserve 2 more, this pushes us to allocate from the next
3906          * larger slab size.
3907          * i.e. RXBUFFER_2048 --> size-4096 slab
3908          */
3909
3910         if (adapter->hw.mac.type == e1000_82580)
3911                 max_frame += IGB_TS_HDR_LEN;
3912
3913         if (max_frame <= IGB_RXBUFFER_1024)
3914                 rx_buffer_len = IGB_RXBUFFER_1024;
3915         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
3916                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
3917         else
3918                 rx_buffer_len = IGB_RXBUFFER_128;
3919
3920         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
3921              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
3922                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
3923
3924         if ((adapter->hw.mac.type == e1000_82580) &&
3925             (rx_buffer_len == IGB_RXBUFFER_128))
3926                 rx_buffer_len += IGB_RXBUFFER_64;
3927
3928         if (netif_running(netdev))
3929                 igb_down(adapter);
3930
3931         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
3932                  netdev->mtu, new_mtu);
3933         netdev->mtu = new_mtu;
3934
3935         for (i = 0; i < adapter->num_rx_queues; i++)
3936                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
3937
3938         if (netif_running(netdev))
3939                 igb_up(adapter);
3940         else
3941                 igb_reset(adapter);
3942
3943         clear_bit(__IGB_RESETTING, &adapter->state);
3944
3945         return 0;
3946 }
3947
3948 /**
3949  * igb_update_stats - Update the board statistics counters
3950  * @adapter: board private structure
3951  **/
3952
3953 void igb_update_stats(struct igb_adapter *adapter)
3954 {
3955         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
3956         struct e1000_hw *hw = &adapter->hw;
3957         struct pci_dev *pdev = adapter->pdev;
3958         u32 reg, mpc;
3959         u16 phy_tmp;
3960         int i;
3961         u64 bytes, packets;
3962
3963 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
3964
3965         /*
3966          * Prevent stats update while adapter is being reset, or if the pci
3967          * connection is down.
3968          */
3969         if (adapter->link_speed == 0)
3970                 return;
3971         if (pci_channel_offline(pdev))
3972                 return;
3973
3974         bytes = 0;
3975         packets = 0;
3976         for (i = 0; i < adapter->num_rx_queues; i++) {
3977                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
3978                 struct igb_ring *ring = adapter->rx_ring[i];
3979                 ring->rx_stats.drops += rqdpc_tmp;
3980                 net_stats->rx_fifo_errors += rqdpc_tmp;
3981                 bytes += ring->rx_stats.bytes;
3982                 packets += ring->rx_stats.packets;
3983         }
3984
3985         net_stats->rx_bytes = bytes;
3986         net_stats->rx_packets = packets;
3987
3988         bytes = 0;
3989         packets = 0;
3990         for (i = 0; i < adapter->num_tx_queues; i++) {
3991                 struct igb_ring *ring = adapter->tx_ring[i];
3992                 bytes += ring->tx_stats.bytes;
3993                 packets += ring->tx_stats.packets;
3994         }
3995         net_stats->tx_bytes = bytes;
3996         net_stats->tx_packets = packets;
3997
3998         /* read stats registers */
3999         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4000         adapter->stats.gprc += rd32(E1000_GPRC);
4001         adapter->stats.gorc += rd32(E1000_GORCL);
4002         rd32(E1000_GORCH); /* clear GORCL */
4003         adapter->stats.bprc += rd32(E1000_BPRC);
4004         adapter->stats.mprc += rd32(E1000_MPRC);
4005         adapter->stats.roc += rd32(E1000_ROC);
4006
4007         adapter->stats.prc64 += rd32(E1000_PRC64);
4008         adapter->stats.prc127 += rd32(E1000_PRC127);
4009         adapter->stats.prc255 += rd32(E1000_PRC255);
4010         adapter->stats.prc511 += rd32(E1000_PRC511);
4011         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4012         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4013         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4014         adapter->stats.sec += rd32(E1000_SEC);
4015
4016         mpc = rd32(E1000_MPC);
4017         adapter->stats.mpc += mpc;
4018         net_stats->rx_fifo_errors += mpc;
4019         adapter->stats.scc += rd32(E1000_SCC);
4020         adapter->stats.ecol += rd32(E1000_ECOL);
4021         adapter->stats.mcc += rd32(E1000_MCC);
4022         adapter->stats.latecol += rd32(E1000_LATECOL);
4023         adapter->stats.dc += rd32(E1000_DC);
4024         adapter->stats.rlec += rd32(E1000_RLEC);
4025         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4026         adapter->stats.xontxc += rd32(E1000_XONTXC);
4027         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4028         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4029         adapter->stats.fcruc += rd32(E1000_FCRUC);
4030         adapter->stats.gptc += rd32(E1000_GPTC);
4031         adapter->stats.gotc += rd32(E1000_GOTCL);
4032         rd32(E1000_GOTCH); /* clear GOTCL */
4033         adapter->stats.rnbc += rd32(E1000_RNBC);
4034         adapter->stats.ruc += rd32(E1000_RUC);
4035         adapter->stats.rfc += rd32(E1000_RFC);
4036         adapter->stats.rjc += rd32(E1000_RJC);
4037         adapter->stats.tor += rd32(E1000_TORH);
4038         adapter->stats.tot += rd32(E1000_TOTH);
4039         adapter->stats.tpr += rd32(E1000_TPR);
4040
4041         adapter->stats.ptc64 += rd32(E1000_PTC64);
4042         adapter->stats.ptc127 += rd32(E1000_PTC127);
4043         adapter->stats.ptc255 += rd32(E1000_PTC255);
4044         adapter->stats.ptc511 += rd32(E1000_PTC511);
4045         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4046         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4047
4048         adapter->stats.mptc += rd32(E1000_MPTC);
4049         adapter->stats.bptc += rd32(E1000_BPTC);
4050
4051         adapter->stats.tpt += rd32(E1000_TPT);
4052         adapter->stats.colc += rd32(E1000_COLC);
4053
4054         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4055         /* read internal phy specific stats */
4056         reg = rd32(E1000_CTRL_EXT);
4057         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4058                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4059                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4060         }
4061
4062         adapter->stats.tsctc += rd32(E1000_TSCTC);
4063         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4064
4065         adapter->stats.iac += rd32(E1000_IAC);
4066         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4067         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4068         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4069         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4070         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4071         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4072         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4073         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4074
4075         /* Fill out the OS statistics structure */
4076         net_stats->multicast = adapter->stats.mprc;
4077         net_stats->collisions = adapter->stats.colc;
4078
4079         /* Rx Errors */
4080
4081         /* RLEC on some newer hardware can be incorrect so build
4082          * our own version based on RUC and ROC */
4083         net_stats->rx_errors = adapter->stats.rxerrc +
4084                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4085                 adapter->stats.ruc + adapter->stats.roc +
4086                 adapter->stats.cexterr;
4087         net_stats->rx_length_errors = adapter->stats.ruc +
4088                                       adapter->stats.roc;
4089         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4090         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4091         net_stats->rx_missed_errors = adapter->stats.mpc;
4092
4093         /* Tx Errors */
4094         net_stats->tx_errors = adapter->stats.ecol +
4095                                adapter->stats.latecol;
4096         net_stats->tx_aborted_errors = adapter->stats.ecol;
4097         net_stats->tx_window_errors = adapter->stats.latecol;
4098         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4099
4100         /* Tx Dropped needs to be maintained elsewhere */
4101
4102         /* Phy Stats */
4103         if (hw->phy.media_type == e1000_media_type_copper) {
4104                 if ((adapter->link_speed == SPEED_1000) &&
4105                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4106                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4107                         adapter->phy_stats.idle_errors += phy_tmp;
4108                 }
4109         }
4110
4111         /* Management Stats */
4112         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4113         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4114         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4115 }
4116
4117 static irqreturn_t igb_msix_other(int irq, void *data)
4118 {
4119         struct igb_adapter *adapter = data;
4120         struct e1000_hw *hw = &adapter->hw;
4121         u32 icr = rd32(E1000_ICR);
4122         /* reading ICR causes bit 31 of EICR to be cleared */
4123
4124         if (icr & E1000_ICR_DRSTA)
4125                 schedule_work(&adapter->reset_task);
4126
4127         if (icr & E1000_ICR_DOUTSYNC) {
4128                 /* HW is reporting DMA is out of sync */
4129                 adapter->stats.doosync++;
4130         }
4131
4132         /* Check for a mailbox event */
4133         if (icr & E1000_ICR_VMMB)
4134                 igb_msg_task(adapter);
4135
4136         if (icr & E1000_ICR_LSC) {
4137                 hw->mac.get_link_status = 1;
4138                 /* guard against interrupt when we're going down */
4139                 if (!test_bit(__IGB_DOWN, &adapter->state))
4140                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4141         }
4142
4143         if (adapter->vfs_allocated_count)
4144                 wr32(E1000_IMS, E1000_IMS_LSC |
4145                                 E1000_IMS_VMMB |
4146                                 E1000_IMS_DOUTSYNC);
4147         else
4148                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4149         wr32(E1000_EIMS, adapter->eims_other);
4150
4151         return IRQ_HANDLED;
4152 }
4153
4154 static void igb_write_itr(struct igb_q_vector *q_vector)
4155 {
4156         struct igb_adapter *adapter = q_vector->adapter;
4157         u32 itr_val = q_vector->itr_val & 0x7FFC;
4158
4159         if (!q_vector->set_itr)
4160                 return;
4161
4162         if (!itr_val)
4163                 itr_val = 0x4;
4164
4165         if (adapter->hw.mac.type == e1000_82575)
4166                 itr_val |= itr_val << 16;
4167         else
4168                 itr_val |= 0x8000000;
4169
4170         writel(itr_val, q_vector->itr_register);
4171         q_vector->set_itr = 0;
4172 }
4173
4174 static irqreturn_t igb_msix_ring(int irq, void *data)
4175 {
4176         struct igb_q_vector *q_vector = data;
4177
4178         /* Write the ITR value calculated from the previous interrupt. */
4179         igb_write_itr(q_vector);
4180
4181         napi_schedule(&q_vector->napi);
4182
4183         return IRQ_HANDLED;
4184 }
4185
4186 #ifdef CONFIG_IGB_DCA
4187 static void igb_update_dca(struct igb_q_vector *q_vector)
4188 {
4189         struct igb_adapter *adapter = q_vector->adapter;
4190         struct e1000_hw *hw = &adapter->hw;
4191         int cpu = get_cpu();
4192
4193         if (q_vector->cpu == cpu)
4194                 goto out_no_update;
4195
4196         if (q_vector->tx_ring) {
4197                 int q = q_vector->tx_ring->reg_idx;
4198                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4199                 if (hw->mac.type == e1000_82575) {
4200                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4201                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4202                 } else {
4203                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4204                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4205                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4206                 }
4207                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4208                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4209         }
4210         if (q_vector->rx_ring) {
4211                 int q = q_vector->rx_ring->reg_idx;
4212                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4213                 if (hw->mac.type == e1000_82575) {
4214                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4215                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4216                 } else {
4217                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4218                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4219                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4220                 }
4221                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4222                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4223                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4224                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4225         }
4226         q_vector->cpu = cpu;
4227 out_no_update:
4228         put_cpu();
4229 }
4230
4231 static void igb_setup_dca(struct igb_adapter *adapter)
4232 {
4233         struct e1000_hw *hw = &adapter->hw;
4234         int i;
4235
4236         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4237                 return;
4238
4239         /* Always use CB2 mode, difference is masked in the CB driver. */
4240         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4241
4242         for (i = 0; i < adapter->num_q_vectors; i++) {
4243                 adapter->q_vector[i]->cpu = -1;
4244                 igb_update_dca(adapter->q_vector[i]);
4245         }
4246 }
4247
4248 static int __igb_notify_dca(struct device *dev, void *data)
4249 {
4250         struct net_device *netdev = dev_get_drvdata(dev);
4251         struct igb_adapter *adapter = netdev_priv(netdev);
4252         struct pci_dev *pdev = adapter->pdev;
4253         struct e1000_hw *hw = &adapter->hw;
4254         unsigned long event = *(unsigned long *)data;
4255
4256         switch (event) {
4257         case DCA_PROVIDER_ADD:
4258                 /* if already enabled, don't do it again */
4259                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4260                         break;
4261                 if (dca_add_requester(dev) == 0) {
4262                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4263                         dev_info(&pdev->dev, "DCA enabled\n");
4264                         igb_setup_dca(adapter);
4265                         break;
4266                 }
4267                 /* Fall Through since DCA is disabled. */
4268         case DCA_PROVIDER_REMOVE:
4269                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4270                         /* without this a class_device is left
4271                          * hanging around in the sysfs model */
4272                         dca_remove_requester(dev);
4273                         dev_info(&pdev->dev, "DCA disabled\n");
4274                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4275                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4276                 }
4277                 break;
4278         }
4279
4280         return 0;
4281 }
4282
4283 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4284                           void *p)
4285 {
4286         int ret_val;
4287
4288         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4289                                          __igb_notify_dca);
4290
4291         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4292 }
4293 #endif /* CONFIG_IGB_DCA */
4294
4295 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4296 {
4297         struct e1000_hw *hw = &adapter->hw;
4298         u32 ping;
4299         int i;
4300
4301         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4302                 ping = E1000_PF_CONTROL_MSG;
4303                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4304                         ping |= E1000_VT_MSGTYPE_CTS;
4305                 igb_write_mbx(hw, &ping, 1, i);
4306         }
4307 }
4308
4309 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4310 {
4311         struct e1000_hw *hw = &adapter->hw;
4312         u32 vmolr = rd32(E1000_VMOLR(vf));
4313         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4314
4315         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4316                             IGB_VF_FLAG_MULTI_PROMISC);
4317         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4318
4319         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4320                 vmolr |= E1000_VMOLR_MPME;
4321                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4322         } else {
4323                 /*
4324                  * if we have hashes and we are clearing a multicast promisc
4325                  * flag we need to write the hashes to the MTA as this step
4326                  * was previously skipped
4327                  */
4328                 if (vf_data->num_vf_mc_hashes > 30) {
4329                         vmolr |= E1000_VMOLR_MPME;
4330                 } else if (vf_data->num_vf_mc_hashes) {
4331                         int j;
4332                         vmolr |= E1000_VMOLR_ROMPE;
4333                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4334                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4335                 }
4336         }
4337
4338         wr32(E1000_VMOLR(vf), vmolr);
4339
4340         /* there are flags left unprocessed, likely not supported */
4341         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4342                 return -EINVAL;
4343
4344         return 0;
4345
4346 }
4347
4348 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4349                                   u32 *msgbuf, u32 vf)
4350 {
4351         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4352         u16 *hash_list = (u16 *)&msgbuf[1];
4353         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4354         int i;
4355
4356         /* salt away the number of multicast addresses assigned
4357          * to this VF for later use to restore when the PF multi cast
4358          * list changes
4359          */
4360         vf_data->num_vf_mc_hashes = n;
4361
4362         /* only up to 30 hash values supported */
4363         if (n > 30)
4364                 n = 30;
4365
4366         /* store the hashes for later use */
4367         for (i = 0; i < n; i++)
4368                 vf_data->vf_mc_hashes[i] = hash_list[i];
4369
4370         /* Flush and reset the mta with the new values */
4371         igb_set_rx_mode(adapter->netdev);
4372
4373         return 0;
4374 }
4375
4376 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4377 {
4378         struct e1000_hw *hw = &adapter->hw;
4379         struct vf_data_storage *vf_data;
4380         int i, j;
4381
4382         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4383                 u32 vmolr = rd32(E1000_VMOLR(i));
4384                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4385
4386                 vf_data = &adapter->vf_data[i];
4387
4388                 if ((vf_data->num_vf_mc_hashes > 30) ||
4389                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4390                         vmolr |= E1000_VMOLR_MPME;
4391                 } else if (vf_data->num_vf_mc_hashes) {
4392                         vmolr |= E1000_VMOLR_ROMPE;
4393                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4394                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4395                 }
4396                 wr32(E1000_VMOLR(i), vmolr);
4397         }
4398 }
4399
4400 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4401 {
4402         struct e1000_hw *hw = &adapter->hw;
4403         u32 pool_mask, reg, vid;
4404         int i;
4405
4406         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4407
4408         /* Find the vlan filter for this id */
4409         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4410                 reg = rd32(E1000_VLVF(i));
4411
4412                 /* remove the vf from the pool */
4413                 reg &= ~pool_mask;
4414
4415                 /* if pool is empty then remove entry from vfta */
4416                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4417                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4418                         reg = 0;
4419                         vid = reg & E1000_VLVF_VLANID_MASK;
4420                         igb_vfta_set(hw, vid, false);
4421                 }
4422
4423                 wr32(E1000_VLVF(i), reg);
4424         }
4425
4426         adapter->vf_data[vf].vlans_enabled = 0;
4427 }
4428
4429 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4430 {
4431         struct e1000_hw *hw = &adapter->hw;
4432         u32 reg, i;
4433
4434         /* The vlvf table only exists on 82576 hardware and newer */
4435         if (hw->mac.type < e1000_82576)
4436                 return -1;
4437
4438         /* we only need to do this if VMDq is enabled */
4439         if (!adapter->vfs_allocated_count)
4440                 return -1;
4441
4442         /* Find the vlan filter for this id */
4443         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4444                 reg = rd32(E1000_VLVF(i));
4445                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4446                     vid == (reg & E1000_VLVF_VLANID_MASK))
4447                         break;
4448         }
4449
4450         if (add) {
4451                 if (i == E1000_VLVF_ARRAY_SIZE) {
4452                         /* Did not find a matching VLAN ID entry that was
4453                          * enabled.  Search for a free filter entry, i.e.
4454                          * one without the enable bit set
4455                          */
4456                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4457                                 reg = rd32(E1000_VLVF(i));
4458                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4459                                         break;
4460                         }
4461                 }
4462                 if (i < E1000_VLVF_ARRAY_SIZE) {
4463                         /* Found an enabled/available entry */
4464                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4465
4466                         /* if !enabled we need to set this up in vfta */
4467                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4468                                 /* add VID to filter table */
4469                                 igb_vfta_set(hw, vid, true);
4470                                 reg |= E1000_VLVF_VLANID_ENABLE;
4471                         }
4472                         reg &= ~E1000_VLVF_VLANID_MASK;
4473                         reg |= vid;
4474                         wr32(E1000_VLVF(i), reg);
4475
4476                         /* do not modify RLPML for PF devices */
4477                         if (vf >= adapter->vfs_allocated_count)
4478                                 return 0;
4479
4480                         if (!adapter->vf_data[vf].vlans_enabled) {
4481                                 u32 size;
4482                                 reg = rd32(E1000_VMOLR(vf));
4483                                 size = reg & E1000_VMOLR_RLPML_MASK;
4484                                 size += 4;
4485                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4486                                 reg |= size;
4487                                 wr32(E1000_VMOLR(vf), reg);
4488                         }
4489
4490                         adapter->vf_data[vf].vlans_enabled++;
4491                         return 0;
4492                 }
4493         } else {
4494                 if (i < E1000_VLVF_ARRAY_SIZE) {
4495                         /* remove vf from the pool */
4496                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4497                         /* if pool is empty then remove entry from vfta */
4498                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4499                                 reg = 0;
4500                                 igb_vfta_set(hw, vid, false);
4501                         }
4502                         wr32(E1000_VLVF(i), reg);
4503
4504                         /* do not modify RLPML for PF devices */
4505                         if (vf >= adapter->vfs_allocated_count)
4506                                 return 0;
4507
4508                         adapter->vf_data[vf].vlans_enabled--;
4509                         if (!adapter->vf_data[vf].vlans_enabled) {
4510                                 u32 size;
4511                                 reg = rd32(E1000_VMOLR(vf));
4512                                 size = reg & E1000_VMOLR_RLPML_MASK;
4513                                 size -= 4;
4514                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4515                                 reg |= size;
4516                                 wr32(E1000_VMOLR(vf), reg);
4517                         }
4518                 }
4519         }
4520         return 0;
4521 }
4522
4523 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4524 {
4525         struct e1000_hw *hw = &adapter->hw;
4526
4527         if (vid)
4528                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4529         else
4530                 wr32(E1000_VMVIR(vf), 0);
4531 }
4532
4533 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4534                                int vf, u16 vlan, u8 qos)
4535 {
4536         int err = 0;
4537         struct igb_adapter *adapter = netdev_priv(netdev);
4538
4539         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4540                 return -EINVAL;
4541         if (vlan || qos) {
4542                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4543                 if (err)
4544                         goto out;
4545                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4546                 igb_set_vmolr(adapter, vf, !vlan);
4547                 adapter->vf_data[vf].pf_vlan = vlan;
4548                 adapter->vf_data[vf].pf_qos = qos;
4549                 dev_info(&adapter->pdev->dev,
4550                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4551                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4552                         dev_warn(&adapter->pdev->dev,
4553                                  "The VF VLAN has been set,"
4554                                  " but the PF device is not up.\n");
4555                         dev_warn(&adapter->pdev->dev,
4556                                  "Bring the PF device up before"
4557                                  " attempting to use the VF device.\n");
4558                 }
4559         } else {
4560                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4561                                    false, vf);
4562                 igb_set_vmvir(adapter, vlan, vf);
4563                 igb_set_vmolr(adapter, vf, true);
4564                 adapter->vf_data[vf].pf_vlan = 0;
4565                 adapter->vf_data[vf].pf_qos = 0;
4566        }
4567 out:
4568        return err;
4569 }
4570
4571 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4572 {
4573         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4574         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4575
4576         return igb_vlvf_set(adapter, vid, add, vf);
4577 }
4578
4579 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4580 {
4581         /* clear flags */
4582         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4583         adapter->vf_data[vf].last_nack = jiffies;
4584
4585         /* reset offloads to defaults */
4586         igb_set_vmolr(adapter, vf, true);
4587
4588         /* reset vlans for device */
4589         igb_clear_vf_vfta(adapter, vf);
4590         if (adapter->vf_data[vf].pf_vlan)
4591                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4592                                     adapter->vf_data[vf].pf_vlan,
4593                                     adapter->vf_data[vf].pf_qos);
4594         else
4595                 igb_clear_vf_vfta(adapter, vf);
4596
4597         /* reset multicast table array for vf */
4598         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4599
4600         /* Flush and reset the mta with the new values */
4601         igb_set_rx_mode(adapter->netdev);
4602 }
4603
4604 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4605 {
4606         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4607
4608         /* generate a new mac address as we were hotplug removed/added */
4609         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4610                 random_ether_addr(vf_mac);
4611
4612         /* process remaining reset events */
4613         igb_vf_reset(adapter, vf);
4614 }
4615
4616 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4617 {
4618         struct e1000_hw *hw = &adapter->hw;
4619         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4620         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4621         u32 reg, msgbuf[3];
4622         u8 *addr = (u8 *)(&msgbuf[1]);
4623
4624         /* process all the same items cleared in a function level reset */
4625         igb_vf_reset(adapter, vf);
4626
4627         /* set vf mac address */
4628         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4629
4630         /* enable transmit and receive for vf */
4631         reg = rd32(E1000_VFTE);
4632         wr32(E1000_VFTE, reg | (1 << vf));
4633         reg = rd32(E1000_VFRE);
4634         wr32(E1000_VFRE, reg | (1 << vf));
4635
4636         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4637
4638         /* reply to reset with ack and vf mac address */
4639         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4640         memcpy(addr, vf_mac, 6);
4641         igb_write_mbx(hw, msgbuf, 3, vf);
4642 }
4643
4644 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4645 {
4646         unsigned char *addr = (char *)&msg[1];
4647         int err = -1;
4648
4649         if (is_valid_ether_addr(addr))
4650                 err = igb_set_vf_mac(adapter, vf, addr);
4651
4652         return err;
4653 }
4654
4655 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4656 {
4657         struct e1000_hw *hw = &adapter->hw;
4658         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4659         u32 msg = E1000_VT_MSGTYPE_NACK;
4660
4661         /* if device isn't clear to send it shouldn't be reading either */
4662         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
4663             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
4664                 igb_write_mbx(hw, &msg, 1, vf);
4665                 vf_data->last_nack = jiffies;
4666         }
4667 }
4668
4669 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
4670 {
4671         struct pci_dev *pdev = adapter->pdev;
4672         u32 msgbuf[E1000_VFMAILBOX_SIZE];
4673         struct e1000_hw *hw = &adapter->hw;
4674         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4675         s32 retval;
4676
4677         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
4678
4679         if (retval) {
4680                 /* if receive failed revoke VF CTS stats and restart init */
4681                 dev_err(&pdev->dev, "Error receiving message from VF\n");
4682                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
4683                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4684                         return;
4685                 goto out;
4686         }
4687
4688         /* this is a message we already processed, do nothing */
4689         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
4690                 return;
4691
4692         /*
4693          * until the vf completes a reset it should not be
4694          * allowed to start any configuration.
4695          */
4696
4697         if (msgbuf[0] == E1000_VF_RESET) {
4698                 igb_vf_reset_msg(adapter, vf);
4699                 return;
4700         }
4701
4702         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
4703                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
4704                         return;
4705                 retval = -1;
4706                 goto out;
4707         }
4708
4709         switch ((msgbuf[0] & 0xFFFF)) {
4710         case E1000_VF_SET_MAC_ADDR:
4711                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
4712                 break;
4713         case E1000_VF_SET_PROMISC:
4714                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
4715                 break;
4716         case E1000_VF_SET_MULTICAST:
4717                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
4718                 break;
4719         case E1000_VF_SET_LPE:
4720                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
4721                 break;
4722         case E1000_VF_SET_VLAN:
4723                 if (adapter->vf_data[vf].pf_vlan)
4724                         retval = -1;
4725                 else
4726                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
4727                 break;
4728         default:
4729                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
4730                 retval = -1;
4731                 break;
4732         }
4733
4734         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
4735 out:
4736         /* notify the VF of the results of what it sent us */
4737         if (retval)
4738                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
4739         else
4740                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
4741
4742         igb_write_mbx(hw, msgbuf, 1, vf);
4743 }
4744
4745 static void igb_msg_task(struct igb_adapter *adapter)
4746 {
4747         struct e1000_hw *hw = &adapter->hw;
4748         u32 vf;
4749
4750         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
4751                 /* process any reset requests */
4752                 if (!igb_check_for_rst(hw, vf))
4753                         igb_vf_reset_event(adapter, vf);
4754
4755                 /* process any messages pending */
4756                 if (!igb_check_for_msg(hw, vf))
4757                         igb_rcv_msg_from_vf(adapter, vf);
4758
4759                 /* process any acks */
4760                 if (!igb_check_for_ack(hw, vf))
4761                         igb_rcv_ack_from_vf(adapter, vf);
4762         }
4763 }
4764
4765 /**
4766  *  igb_set_uta - Set unicast filter table address
4767  *  @adapter: board private structure
4768  *
4769  *  The unicast table address is a register array of 32-bit registers.
4770  *  The table is meant to be used in a way similar to how the MTA is used
4771  *  however due to certain limitations in the hardware it is necessary to
4772  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
4773  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
4774  **/
4775 static void igb_set_uta(struct igb_adapter *adapter)
4776 {
4777         struct e1000_hw *hw = &adapter->hw;
4778         int i;
4779
4780         /* The UTA table only exists on 82576 hardware and newer */
4781         if (hw->mac.type < e1000_82576)
4782                 return;
4783
4784         /* we only need to do this if VMDq is enabled */
4785         if (!adapter->vfs_allocated_count)
4786                 return;
4787
4788         for (i = 0; i < hw->mac.uta_reg_count; i++)
4789                 array_wr32(E1000_UTA, i, ~0);
4790 }
4791
4792 /**
4793  * igb_intr_msi - Interrupt Handler
4794  * @irq: interrupt number
4795  * @data: pointer to a network interface device structure
4796  **/
4797 static irqreturn_t igb_intr_msi(int irq, void *data)
4798 {
4799         struct igb_adapter *adapter = data;
4800         struct igb_q_vector *q_vector = adapter->q_vector[0];
4801         struct e1000_hw *hw = &adapter->hw;
4802         /* read ICR disables interrupts using IAM */
4803         u32 icr = rd32(E1000_ICR);
4804
4805         igb_write_itr(q_vector);
4806
4807         if (icr & E1000_ICR_DRSTA)
4808                 schedule_work(&adapter->reset_task);
4809
4810         if (icr & E1000_ICR_DOUTSYNC) {
4811                 /* HW is reporting DMA is out of sync */
4812                 adapter->stats.doosync++;
4813         }
4814
4815         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4816                 hw->mac.get_link_status = 1;
4817                 if (!test_bit(__IGB_DOWN, &adapter->state))
4818                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4819         }
4820
4821         napi_schedule(&q_vector->napi);
4822
4823         return IRQ_HANDLED;
4824 }
4825
4826 /**
4827  * igb_intr - Legacy Interrupt Handler
4828  * @irq: interrupt number
4829  * @data: pointer to a network interface device structure
4830  **/
4831 static irqreturn_t igb_intr(int irq, void *data)
4832 {
4833         struct igb_adapter *adapter = data;
4834         struct igb_q_vector *q_vector = adapter->q_vector[0];
4835         struct e1000_hw *hw = &adapter->hw;
4836         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
4837          * need for the IMC write */
4838         u32 icr = rd32(E1000_ICR);
4839         if (!icr)
4840                 return IRQ_NONE;  /* Not our interrupt */
4841
4842         igb_write_itr(q_vector);
4843
4844         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
4845          * not set, then the adapter didn't send an interrupt */
4846         if (!(icr & E1000_ICR_INT_ASSERTED))
4847                 return IRQ_NONE;
4848
4849         if (icr & E1000_ICR_DRSTA)
4850                 schedule_work(&adapter->reset_task);
4851
4852         if (icr & E1000_ICR_DOUTSYNC) {
4853                 /* HW is reporting DMA is out of sync */
4854                 adapter->stats.doosync++;
4855         }
4856
4857         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
4858                 hw->mac.get_link_status = 1;
4859                 /* guard against interrupt when we're going down */
4860                 if (!test_bit(__IGB_DOWN, &adapter->state))
4861                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4862         }
4863
4864         napi_schedule(&q_vector->napi);
4865
4866         return IRQ_HANDLED;
4867 }
4868
4869 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
4870 {
4871         struct igb_adapter *adapter = q_vector->adapter;
4872         struct e1000_hw *hw = &adapter->hw;
4873
4874         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
4875             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
4876                 if (!adapter->msix_entries)
4877                         igb_set_itr(adapter);
4878                 else
4879                         igb_update_ring_itr(q_vector);
4880         }
4881
4882         if (!test_bit(__IGB_DOWN, &adapter->state)) {
4883                 if (adapter->msix_entries)
4884                         wr32(E1000_EIMS, q_vector->eims_value);
4885                 else
4886                         igb_irq_enable(adapter);
4887         }
4888 }
4889
4890 /**
4891  * igb_poll - NAPI Rx polling callback
4892  * @napi: napi polling structure
4893  * @budget: count of how many packets we should handle
4894  **/
4895 static int igb_poll(struct napi_struct *napi, int budget)
4896 {
4897         struct igb_q_vector *q_vector = container_of(napi,
4898                                                      struct igb_q_vector,
4899                                                      napi);
4900         int tx_clean_complete = 1, work_done = 0;
4901
4902 #ifdef CONFIG_IGB_DCA
4903         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
4904                 igb_update_dca(q_vector);
4905 #endif
4906         if (q_vector->tx_ring)
4907                 tx_clean_complete = igb_clean_tx_irq(q_vector);
4908
4909         if (q_vector->rx_ring)
4910                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
4911
4912         if (!tx_clean_complete)
4913                 work_done = budget;
4914
4915         /* If not enough Rx work done, exit the polling mode */
4916         if (work_done < budget) {
4917                 napi_complete(napi);
4918                 igb_ring_irq_enable(q_vector);
4919         }
4920
4921         return work_done;
4922 }
4923
4924 /**
4925  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
4926  * @adapter: board private structure
4927  * @shhwtstamps: timestamp structure to update
4928  * @regval: unsigned 64bit system time value.
4929  *
4930  * We need to convert the system time value stored in the RX/TXSTMP registers
4931  * into a hwtstamp which can be used by the upper level timestamping functions
4932  */
4933 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
4934                                    struct skb_shared_hwtstamps *shhwtstamps,
4935                                    u64 regval)
4936 {
4937         u64 ns;
4938
4939         /*
4940          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
4941          * 24 to match clock shift we setup earlier.
4942          */
4943         if (adapter->hw.mac.type == e1000_82580)
4944                 regval <<= IGB_82580_TSYNC_SHIFT;
4945
4946         ns = timecounter_cyc2time(&adapter->clock, regval);
4947         timecompare_update(&adapter->compare, ns);
4948         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
4949         shhwtstamps->hwtstamp = ns_to_ktime(ns);
4950         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
4951 }
4952
4953 /**
4954  * igb_tx_hwtstamp - utility function which checks for TX time stamp
4955  * @q_vector: pointer to q_vector containing needed info
4956  * @skb: packet that was just sent
4957  *
4958  * If we were asked to do hardware stamping and such a time stamp is
4959  * available, then it must have been for this skb here because we only
4960  * allow only one such packet into the queue.
4961  */
4962 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
4963 {
4964         struct igb_adapter *adapter = q_vector->adapter;
4965         union skb_shared_tx *shtx = skb_tx(skb);
4966         struct e1000_hw *hw = &adapter->hw;
4967         struct skb_shared_hwtstamps shhwtstamps;
4968         u64 regval;
4969
4970         /* if skb does not support hw timestamp or TX stamp not valid exit */
4971         if (likely(!shtx->hardware) ||
4972             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
4973                 return;
4974
4975         regval = rd32(E1000_TXSTMPL);
4976         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
4977
4978         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
4979         skb_tstamp_tx(skb, &shhwtstamps);
4980 }
4981
4982 /**
4983  * igb_clean_tx_irq - Reclaim resources after transmit completes
4984  * @q_vector: pointer to q_vector containing needed info
4985  * returns true if ring is completely cleaned
4986  **/
4987 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
4988 {
4989         struct igb_adapter *adapter = q_vector->adapter;
4990         struct igb_ring *tx_ring = q_vector->tx_ring;
4991         struct net_device *netdev = tx_ring->netdev;
4992         struct e1000_hw *hw = &adapter->hw;
4993         struct igb_buffer *buffer_info;
4994         struct sk_buff *skb;
4995         union e1000_adv_tx_desc *tx_desc, *eop_desc;
4996         unsigned int total_bytes = 0, total_packets = 0;
4997         unsigned int i, eop, count = 0;
4998         bool cleaned = false;
4999
5000         i = tx_ring->next_to_clean;
5001         eop = tx_ring->buffer_info[i].next_to_watch;
5002         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5003
5004         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5005                (count < tx_ring->count)) {
5006                 for (cleaned = false; !cleaned; count++) {
5007                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5008                         buffer_info = &tx_ring->buffer_info[i];
5009                         cleaned = (i == eop);
5010                         skb = buffer_info->skb;
5011
5012                         if (skb) {
5013                                 unsigned int segs, bytecount;
5014                                 /* gso_segs is currently only valid for tcp */
5015                                 segs = buffer_info->gso_segs;
5016                                 /* multiply data chunks by size of headers */
5017                                 bytecount = ((segs - 1) * skb_headlen(skb)) +
5018                                             skb->len;
5019                                 total_packets += segs;
5020                                 total_bytes += bytecount;
5021
5022                                 igb_tx_hwtstamp(q_vector, skb);
5023                         }
5024
5025                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5026                         tx_desc->wb.status = 0;
5027
5028                         i++;
5029                         if (i == tx_ring->count)
5030                                 i = 0;
5031                 }
5032                 eop = tx_ring->buffer_info[i].next_to_watch;
5033                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5034         }
5035
5036         tx_ring->next_to_clean = i;
5037
5038         if (unlikely(count &&
5039                      netif_carrier_ok(netdev) &&
5040                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5041                 /* Make sure that anybody stopping the queue after this
5042                  * sees the new next_to_clean.
5043                  */
5044                 smp_mb();
5045                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5046                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5047                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5048                         tx_ring->tx_stats.restart_queue++;
5049                 }
5050         }
5051
5052         if (tx_ring->detect_tx_hung) {
5053                 /* Detect a transmit hang in hardware, this serializes the
5054                  * check with the clearing of time_stamp and movement of i */
5055                 tx_ring->detect_tx_hung = false;
5056                 if (tx_ring->buffer_info[i].time_stamp &&
5057                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5058                                (adapter->tx_timeout_factor * HZ)) &&
5059                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5060
5061                         /* detected Tx unit hang */
5062                         dev_err(&tx_ring->pdev->dev,
5063                                 "Detected Tx Unit Hang\n"
5064                                 "  Tx Queue             <%d>\n"
5065                                 "  TDH                  <%x>\n"
5066                                 "  TDT                  <%x>\n"
5067                                 "  next_to_use          <%x>\n"
5068                                 "  next_to_clean        <%x>\n"
5069                                 "buffer_info[next_to_clean]\n"
5070                                 "  time_stamp           <%lx>\n"
5071                                 "  next_to_watch        <%x>\n"
5072                                 "  jiffies              <%lx>\n"
5073                                 "  desc.status          <%x>\n",
5074                                 tx_ring->queue_index,
5075                                 readl(tx_ring->head),
5076                                 readl(tx_ring->tail),
5077                                 tx_ring->next_to_use,
5078                                 tx_ring->next_to_clean,
5079                                 tx_ring->buffer_info[eop].time_stamp,
5080                                 eop,
5081                                 jiffies,
5082                                 eop_desc->wb.status);
5083                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5084                 }
5085         }
5086         tx_ring->total_bytes += total_bytes;
5087         tx_ring->total_packets += total_packets;
5088         tx_ring->tx_stats.bytes += total_bytes;
5089         tx_ring->tx_stats.packets += total_packets;
5090         return (count < tx_ring->count);
5091 }
5092
5093 /**
5094  * igb_receive_skb - helper function to handle rx indications
5095  * @q_vector: structure containing interrupt and ring information
5096  * @skb: packet to send up
5097  * @vlan_tag: vlan tag for packet
5098  **/
5099 static void igb_receive_skb(struct igb_q_vector *q_vector,
5100                             struct sk_buff *skb,
5101                             u16 vlan_tag)
5102 {
5103         struct igb_adapter *adapter = q_vector->adapter;
5104
5105         if (vlan_tag && adapter->vlgrp)
5106                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5107                                  vlan_tag, skb);
5108         else
5109                 napi_gro_receive(&q_vector->napi, skb);
5110 }
5111
5112 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5113                                        u32 status_err, struct sk_buff *skb)
5114 {
5115         skb->ip_summed = CHECKSUM_NONE;
5116
5117         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5118         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5119              (status_err & E1000_RXD_STAT_IXSM))
5120                 return;
5121
5122         /* TCP/UDP checksum error bit is set */
5123         if (status_err &
5124             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5125                 /*
5126                  * work around errata with sctp packets where the TCPE aka
5127                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5128                  * packets, (aka let the stack check the crc32c)
5129                  */
5130                 if ((skb->len == 60) &&
5131                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5132                         ring->rx_stats.csum_err++;
5133
5134                 /* let the stack verify checksum errors */
5135                 return;
5136         }
5137         /* It must be a TCP or UDP packet with a valid checksum */
5138         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5139                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5140
5141         dev_dbg(&ring->pdev->dev, "cksum success: bits %08X\n", status_err);
5142 }
5143
5144 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5145                                    struct sk_buff *skb)
5146 {
5147         struct igb_adapter *adapter = q_vector->adapter;
5148         struct e1000_hw *hw = &adapter->hw;
5149         u64 regval;
5150
5151         /*
5152          * If this bit is set, then the RX registers contain the time stamp. No
5153          * other packet will be time stamped until we read these registers, so
5154          * read the registers to make them available again. Because only one
5155          * packet can be time stamped at a time, we know that the register
5156          * values must belong to this one here and therefore we don't need to
5157          * compare any of the additional attributes stored for it.
5158          *
5159          * If nothing went wrong, then it should have a skb_shared_tx that we
5160          * can turn into a skb_shared_hwtstamps.
5161          */
5162         if (staterr & E1000_RXDADV_STAT_TSIP) {
5163                 u32 *stamp = (u32 *)skb->data;
5164                 regval = le32_to_cpu(*(stamp + 2));
5165                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5166                 skb_pull(skb, IGB_TS_HDR_LEN);
5167         } else {
5168                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5169                         return;
5170
5171                 regval = rd32(E1000_RXSTMPL);
5172                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5173         }
5174
5175         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5176 }
5177 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5178                                union e1000_adv_rx_desc *rx_desc)
5179 {
5180         /* HW will not DMA in data larger than the given buffer, even if it
5181          * parses the (NFS, of course) header to be larger.  In that case, it
5182          * fills the header buffer and spills the rest into the page.
5183          */
5184         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5185                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5186         if (hlen > rx_ring->rx_buffer_len)
5187                 hlen = rx_ring->rx_buffer_len;
5188         return hlen;
5189 }
5190
5191 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5192                                  int *work_done, int budget)
5193 {
5194         struct igb_ring *rx_ring = q_vector->rx_ring;
5195         struct net_device *netdev = rx_ring->netdev;
5196         struct pci_dev *pdev = rx_ring->pdev;
5197         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5198         struct igb_buffer *buffer_info , *next_buffer;
5199         struct sk_buff *skb;
5200         bool cleaned = false;
5201         int cleaned_count = 0;
5202         int current_node = numa_node_id();
5203         unsigned int total_bytes = 0, total_packets = 0;
5204         unsigned int i;
5205         u32 staterr;
5206         u16 length;
5207         u16 vlan_tag;
5208
5209         i = rx_ring->next_to_clean;
5210         buffer_info = &rx_ring->buffer_info[i];
5211         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5212         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5213
5214         while (staterr & E1000_RXD_STAT_DD) {
5215                 if (*work_done >= budget)
5216                         break;
5217                 (*work_done)++;
5218
5219                 skb = buffer_info->skb;
5220                 prefetch(skb->data - NET_IP_ALIGN);
5221                 buffer_info->skb = NULL;
5222
5223                 i++;
5224                 if (i == rx_ring->count)
5225                         i = 0;
5226
5227                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5228                 prefetch(next_rxd);
5229                 next_buffer = &rx_ring->buffer_info[i];
5230
5231                 length = le16_to_cpu(rx_desc->wb.upper.length);
5232                 cleaned = true;
5233                 cleaned_count++;
5234
5235                 if (buffer_info->dma) {
5236                         pci_unmap_single(pdev, buffer_info->dma,
5237                                          rx_ring->rx_buffer_len,
5238                                          PCI_DMA_FROMDEVICE);
5239                         buffer_info->dma = 0;
5240                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5241                                 skb_put(skb, length);
5242                                 goto send_up;
5243                         }
5244                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5245                 }
5246
5247                 if (length) {
5248                         pci_unmap_page(pdev, buffer_info->page_dma,
5249                                        PAGE_SIZE / 2, PCI_DMA_FROMDEVICE);
5250                         buffer_info->page_dma = 0;
5251
5252                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5253                                                 buffer_info->page,
5254                                                 buffer_info->page_offset,
5255                                                 length);
5256
5257                         if ((page_count(buffer_info->page) != 1) ||
5258                             (page_to_nid(buffer_info->page) != current_node))
5259                                 buffer_info->page = NULL;
5260                         else
5261                                 get_page(buffer_info->page);
5262
5263                         skb->len += length;
5264                         skb->data_len += length;
5265                         skb->truesize += length;
5266                 }
5267
5268                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5269                         buffer_info->skb = next_buffer->skb;
5270                         buffer_info->dma = next_buffer->dma;
5271                         next_buffer->skb = skb;
5272                         next_buffer->dma = 0;
5273                         goto next_desc;
5274                 }
5275 send_up:
5276                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5277                         dev_kfree_skb_irq(skb);
5278                         goto next_desc;
5279                 }
5280
5281                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5282                         igb_rx_hwtstamp(q_vector, staterr, skb);
5283                 total_bytes += skb->len;
5284                 total_packets++;
5285
5286                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5287
5288                 skb->protocol = eth_type_trans(skb, netdev);
5289                 skb_record_rx_queue(skb, rx_ring->queue_index);
5290
5291                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5292                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5293
5294                 igb_receive_skb(q_vector, skb, vlan_tag);
5295
5296 next_desc:
5297                 rx_desc->wb.upper.status_error = 0;
5298
5299                 /* return some buffers to hardware, one at a time is too slow */
5300                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5301                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5302                         cleaned_count = 0;
5303                 }
5304
5305                 /* use prefetched values */
5306                 rx_desc = next_rxd;
5307                 buffer_info = next_buffer;
5308                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5309         }
5310
5311         rx_ring->next_to_clean = i;
5312         cleaned_count = igb_desc_unused(rx_ring);
5313
5314         if (cleaned_count)
5315                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5316
5317         rx_ring->total_packets += total_packets;
5318         rx_ring->total_bytes += total_bytes;
5319         rx_ring->rx_stats.packets += total_packets;
5320         rx_ring->rx_stats.bytes += total_bytes;
5321         return cleaned;
5322 }
5323
5324 /**
5325  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5326  * @adapter: address of board private structure
5327  **/
5328 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5329 {
5330         struct net_device *netdev = rx_ring->netdev;
5331         union e1000_adv_rx_desc *rx_desc;
5332         struct igb_buffer *buffer_info;
5333         struct sk_buff *skb;
5334         unsigned int i;
5335         int bufsz;
5336
5337         i = rx_ring->next_to_use;
5338         buffer_info = &rx_ring->buffer_info[i];
5339
5340         bufsz = rx_ring->rx_buffer_len;
5341
5342         while (cleaned_count--) {
5343                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5344
5345                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5346                         if (!buffer_info->page) {
5347                                 buffer_info->page = netdev_alloc_page(netdev);
5348                                 if (!buffer_info->page) {
5349                                         rx_ring->rx_stats.alloc_failed++;
5350                                         goto no_buffers;
5351                                 }
5352                                 buffer_info->page_offset = 0;
5353                         } else {
5354                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5355                         }
5356                         buffer_info->page_dma =
5357                                 pci_map_page(rx_ring->pdev, buffer_info->page,
5358                                              buffer_info->page_offset,
5359                                              PAGE_SIZE / 2,
5360                                              PCI_DMA_FROMDEVICE);
5361                         if (pci_dma_mapping_error(rx_ring->pdev,
5362                                                   buffer_info->page_dma)) {
5363                                 buffer_info->page_dma = 0;
5364                                 rx_ring->rx_stats.alloc_failed++;
5365                                 goto no_buffers;
5366                         }
5367                 }
5368
5369                 skb = buffer_info->skb;
5370                 if (!skb) {
5371                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5372                         if (!skb) {
5373                                 rx_ring->rx_stats.alloc_failed++;
5374                                 goto no_buffers;
5375                         }
5376
5377                         buffer_info->skb = skb;
5378                 }
5379                 if (!buffer_info->dma) {
5380                         buffer_info->dma = pci_map_single(rx_ring->pdev,
5381                                                           skb->data,
5382                                                           bufsz,
5383                                                           PCI_DMA_FROMDEVICE);
5384                         if (pci_dma_mapping_error(rx_ring->pdev,
5385                                                   buffer_info->dma)) {
5386                                 buffer_info->dma = 0;
5387                                 rx_ring->rx_stats.alloc_failed++;
5388                                 goto no_buffers;
5389                         }
5390                 }
5391                 /* Refresh the desc even if buffer_addrs didn't change because
5392                  * each write-back erases this info. */
5393                 if (bufsz < IGB_RXBUFFER_1024) {
5394                         rx_desc->read.pkt_addr =
5395                              cpu_to_le64(buffer_info->page_dma);
5396                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5397                 } else {
5398                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5399                         rx_desc->read.hdr_addr = 0;
5400                 }
5401
5402                 i++;
5403                 if (i == rx_ring->count)
5404                         i = 0;
5405                 buffer_info = &rx_ring->buffer_info[i];
5406         }
5407
5408 no_buffers:
5409         if (rx_ring->next_to_use != i) {
5410                 rx_ring->next_to_use = i;
5411                 if (i == 0)
5412                         i = (rx_ring->count - 1);
5413                 else
5414                         i--;
5415
5416                 /* Force memory writes to complete before letting h/w
5417                  * know there are new descriptors to fetch.  (Only
5418                  * applicable for weak-ordered memory model archs,
5419                  * such as IA-64). */
5420                 wmb();
5421                 writel(i, rx_ring->tail);
5422         }
5423 }
5424
5425 /**
5426  * igb_mii_ioctl -
5427  * @netdev:
5428  * @ifreq:
5429  * @cmd:
5430  **/
5431 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5432 {
5433         struct igb_adapter *adapter = netdev_priv(netdev);
5434         struct mii_ioctl_data *data = if_mii(ifr);
5435
5436         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5437                 return -EOPNOTSUPP;
5438
5439         switch (cmd) {
5440         case SIOCGMIIPHY:
5441                 data->phy_id = adapter->hw.phy.addr;
5442                 break;
5443         case SIOCGMIIREG:
5444                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5445                                      &data->val_out))
5446                         return -EIO;
5447                 break;
5448         case SIOCSMIIREG:
5449         default:
5450                 return -EOPNOTSUPP;
5451         }
5452         return 0;
5453 }
5454
5455 /**
5456  * igb_hwtstamp_ioctl - control hardware time stamping
5457  * @netdev:
5458  * @ifreq:
5459  * @cmd:
5460  *
5461  * Outgoing time stamping can be enabled and disabled. Play nice and
5462  * disable it when requested, although it shouldn't case any overhead
5463  * when no packet needs it. At most one packet in the queue may be
5464  * marked for time stamping, otherwise it would be impossible to tell
5465  * for sure to which packet the hardware time stamp belongs.
5466  *
5467  * Incoming time stamping has to be configured via the hardware
5468  * filters. Not all combinations are supported, in particular event
5469  * type has to be specified. Matching the kind of event packet is
5470  * not supported, with the exception of "all V2 events regardless of
5471  * level 2 or 4".
5472  *
5473  **/
5474 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5475                               struct ifreq *ifr, int cmd)
5476 {
5477         struct igb_adapter *adapter = netdev_priv(netdev);
5478         struct e1000_hw *hw = &adapter->hw;
5479         struct hwtstamp_config config;
5480         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5481         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5482         u32 tsync_rx_cfg = 0;
5483         bool is_l4 = false;
5484         bool is_l2 = false;
5485         u32 regval;
5486
5487         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5488                 return -EFAULT;
5489
5490         /* reserved for future extensions */
5491         if (config.flags)
5492                 return -EINVAL;
5493
5494         switch (config.tx_type) {
5495         case HWTSTAMP_TX_OFF:
5496                 tsync_tx_ctl = 0;
5497         case HWTSTAMP_TX_ON:
5498                 break;
5499         default:
5500                 return -ERANGE;
5501         }
5502
5503         switch (config.rx_filter) {
5504         case HWTSTAMP_FILTER_NONE:
5505                 tsync_rx_ctl = 0;
5506                 break;
5507         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5508         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5509         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5510         case HWTSTAMP_FILTER_ALL:
5511                 /*
5512                  * register TSYNCRXCFG must be set, therefore it is not
5513                  * possible to time stamp both Sync and Delay_Req messages
5514                  * => fall back to time stamping all packets
5515                  */
5516                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5517                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5518                 break;
5519         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5520                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5521                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5522                 is_l4 = true;
5523                 break;
5524         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5525                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5526                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5527                 is_l4 = true;
5528                 break;
5529         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5530         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5531                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5532                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5533                 is_l2 = true;
5534                 is_l4 = true;
5535                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5536                 break;
5537         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5538         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5539                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5540                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5541                 is_l2 = true;
5542                 is_l4 = true;
5543                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5544                 break;
5545         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5546         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5547         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5548                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5549                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5550                 is_l2 = true;
5551                 break;
5552         default:
5553                 return -ERANGE;
5554         }
5555
5556         if (hw->mac.type == e1000_82575) {
5557                 if (tsync_rx_ctl | tsync_tx_ctl)
5558                         return -EINVAL;
5559                 return 0;
5560         }
5561
5562         /*
5563          * Per-packet timestamping only works if all packets are
5564          * timestamped, so enable timestamping in all packets as
5565          * long as one rx filter was configured.
5566          */
5567         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5568                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5569                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5570         }
5571
5572         /* enable/disable TX */
5573         regval = rd32(E1000_TSYNCTXCTL);
5574         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5575         regval |= tsync_tx_ctl;
5576         wr32(E1000_TSYNCTXCTL, regval);
5577
5578         /* enable/disable RX */
5579         regval = rd32(E1000_TSYNCRXCTL);
5580         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5581         regval |= tsync_rx_ctl;
5582         wr32(E1000_TSYNCRXCTL, regval);
5583
5584         /* define which PTP packets are time stamped */
5585         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5586
5587         /* define ethertype filter for timestamped packets */
5588         if (is_l2)
5589                 wr32(E1000_ETQF(3),
5590                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5591                                  E1000_ETQF_1588 | /* enable timestamping */
5592                                  ETH_P_1588));     /* 1588 eth protocol type */
5593         else
5594                 wr32(E1000_ETQF(3), 0);
5595
5596 #define PTP_PORT 319
5597         /* L4 Queue Filter[3]: filter by destination port and protocol */
5598         if (is_l4) {
5599                 u32 ftqf = (IPPROTO_UDP /* UDP */
5600                         | E1000_FTQF_VF_BP /* VF not compared */
5601                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5602                         | E1000_FTQF_MASK); /* mask all inputs */
5603                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5604
5605                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5606                 wr32(E1000_IMIREXT(3),
5607                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5608                 if (hw->mac.type == e1000_82576) {
5609                         /* enable source port check */
5610                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5611                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5612                 }
5613                 wr32(E1000_FTQF(3), ftqf);
5614         } else {
5615                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5616         }
5617         wrfl();
5618
5619         adapter->hwtstamp_config = config;
5620
5621         /* clear TX/RX time stamp registers, just to be sure */
5622         regval = rd32(E1000_TXSTMPH);
5623         regval = rd32(E1000_RXSTMPH);
5624
5625         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5626                 -EFAULT : 0;
5627 }
5628
5629 /**
5630  * igb_ioctl -
5631  * @netdev:
5632  * @ifreq:
5633  * @cmd:
5634  **/
5635 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5636 {
5637         switch (cmd) {
5638         case SIOCGMIIPHY:
5639         case SIOCGMIIREG:
5640         case SIOCSMIIREG:
5641                 return igb_mii_ioctl(netdev, ifr, cmd);
5642         case SIOCSHWTSTAMP:
5643                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5644         default:
5645                 return -EOPNOTSUPP;
5646         }
5647 }
5648
5649 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5650 {
5651         struct igb_adapter *adapter = hw->back;
5652         u16 cap_offset;
5653
5654         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5655         if (!cap_offset)
5656                 return -E1000_ERR_CONFIG;
5657
5658         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5659
5660         return 0;
5661 }
5662
5663 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5664 {
5665         struct igb_adapter *adapter = hw->back;
5666         u16 cap_offset;
5667
5668         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5669         if (!cap_offset)
5670                 return -E1000_ERR_CONFIG;
5671
5672         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
5673
5674         return 0;
5675 }
5676
5677 static void igb_vlan_rx_register(struct net_device *netdev,
5678                                  struct vlan_group *grp)
5679 {
5680         struct igb_adapter *adapter = netdev_priv(netdev);
5681         struct e1000_hw *hw = &adapter->hw;
5682         u32 ctrl, rctl;
5683
5684         igb_irq_disable(adapter);
5685         adapter->vlgrp = grp;
5686
5687         if (grp) {
5688                 /* enable VLAN tag insert/strip */
5689                 ctrl = rd32(E1000_CTRL);
5690                 ctrl |= E1000_CTRL_VME;
5691                 wr32(E1000_CTRL, ctrl);
5692
5693                 /* Disable CFI check */
5694                 rctl = rd32(E1000_RCTL);
5695                 rctl &= ~E1000_RCTL_CFIEN;
5696                 wr32(E1000_RCTL, rctl);
5697         } else {
5698                 /* disable VLAN tag insert/strip */
5699                 ctrl = rd32(E1000_CTRL);
5700                 ctrl &= ~E1000_CTRL_VME;
5701                 wr32(E1000_CTRL, ctrl);
5702         }
5703
5704         igb_rlpml_set(adapter);
5705
5706         if (!test_bit(__IGB_DOWN, &adapter->state))
5707                 igb_irq_enable(adapter);
5708 }
5709
5710 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
5711 {
5712         struct igb_adapter *adapter = netdev_priv(netdev);
5713         struct e1000_hw *hw = &adapter->hw;
5714         int pf_id = adapter->vfs_allocated_count;
5715
5716         /* attempt to add filter to vlvf array */
5717         igb_vlvf_set(adapter, vid, true, pf_id);
5718
5719         /* add the filter since PF can receive vlans w/o entry in vlvf */
5720         igb_vfta_set(hw, vid, true);
5721 }
5722
5723 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
5724 {
5725         struct igb_adapter *adapter = netdev_priv(netdev);
5726         struct e1000_hw *hw = &adapter->hw;
5727         int pf_id = adapter->vfs_allocated_count;
5728         s32 err;
5729
5730         igb_irq_disable(adapter);
5731         vlan_group_set_device(adapter->vlgrp, vid, NULL);
5732
5733         if (!test_bit(__IGB_DOWN, &adapter->state))
5734                 igb_irq_enable(adapter);
5735
5736         /* remove vlan from VLVF table array */
5737         err = igb_vlvf_set(adapter, vid, false, pf_id);
5738
5739         /* if vid was not present in VLVF just remove it from table */
5740         if (err)
5741                 igb_vfta_set(hw, vid, false);
5742 }
5743
5744 static void igb_restore_vlan(struct igb_adapter *adapter)
5745 {
5746         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
5747
5748         if (adapter->vlgrp) {
5749                 u16 vid;
5750                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
5751                         if (!vlan_group_get_device(adapter->vlgrp, vid))
5752                                 continue;
5753                         igb_vlan_rx_add_vid(adapter->netdev, vid);
5754                 }
5755         }
5756 }
5757
5758 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
5759 {
5760         struct pci_dev *pdev = adapter->pdev;
5761         struct e1000_mac_info *mac = &adapter->hw.mac;
5762
5763         mac->autoneg = 0;
5764
5765         switch (spddplx) {
5766         case SPEED_10 + DUPLEX_HALF:
5767                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
5768                 break;
5769         case SPEED_10 + DUPLEX_FULL:
5770                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
5771                 break;
5772         case SPEED_100 + DUPLEX_HALF:
5773                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
5774                 break;
5775         case SPEED_100 + DUPLEX_FULL:
5776                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
5777                 break;
5778         case SPEED_1000 + DUPLEX_FULL:
5779                 mac->autoneg = 1;
5780                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
5781                 break;
5782         case SPEED_1000 + DUPLEX_HALF: /* not supported */
5783         default:
5784                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
5785                 return -EINVAL;
5786         }
5787         return 0;
5788 }
5789
5790 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
5791 {
5792         struct net_device *netdev = pci_get_drvdata(pdev);
5793         struct igb_adapter *adapter = netdev_priv(netdev);
5794         struct e1000_hw *hw = &adapter->hw;
5795         u32 ctrl, rctl, status;
5796         u32 wufc = adapter->wol;
5797 #ifdef CONFIG_PM
5798         int retval = 0;
5799 #endif
5800
5801         netif_device_detach(netdev);
5802
5803         if (netif_running(netdev))
5804                 igb_close(netdev);
5805
5806         igb_clear_interrupt_scheme(adapter);
5807
5808 #ifdef CONFIG_PM
5809         retval = pci_save_state(pdev);
5810         if (retval)
5811                 return retval;
5812 #endif
5813
5814         status = rd32(E1000_STATUS);
5815         if (status & E1000_STATUS_LU)
5816                 wufc &= ~E1000_WUFC_LNKC;
5817
5818         if (wufc) {
5819                 igb_setup_rctl(adapter);
5820                 igb_set_rx_mode(netdev);
5821
5822                 /* turn on all-multi mode if wake on multicast is enabled */
5823                 if (wufc & E1000_WUFC_MC) {
5824                         rctl = rd32(E1000_RCTL);
5825                         rctl |= E1000_RCTL_MPE;
5826                         wr32(E1000_RCTL, rctl);
5827                 }
5828
5829                 ctrl = rd32(E1000_CTRL);
5830                 /* advertise wake from D3Cold */
5831                 #define E1000_CTRL_ADVD3WUC 0x00100000
5832                 /* phy power management enable */
5833                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
5834                 ctrl |= E1000_CTRL_ADVD3WUC;
5835                 wr32(E1000_CTRL, ctrl);
5836
5837                 /* Allow time for pending master requests to run */
5838                 igb_disable_pcie_master(hw);
5839
5840                 wr32(E1000_WUC, E1000_WUC_PME_EN);
5841                 wr32(E1000_WUFC, wufc);
5842         } else {
5843                 wr32(E1000_WUC, 0);
5844                 wr32(E1000_WUFC, 0);
5845         }
5846
5847         *enable_wake = wufc || adapter->en_mng_pt;
5848         if (!*enable_wake)
5849                 igb_power_down_link(adapter);
5850         else
5851                 igb_power_up_link(adapter);
5852
5853         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
5854          * would have already happened in close and is redundant. */
5855         igb_release_hw_control(adapter);
5856
5857         pci_disable_device(pdev);
5858
5859         return 0;
5860 }
5861
5862 #ifdef CONFIG_PM
5863 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
5864 {
5865         int retval;
5866         bool wake;
5867
5868         retval = __igb_shutdown(pdev, &wake);
5869         if (retval)
5870                 return retval;
5871
5872         if (wake) {
5873                 pci_prepare_to_sleep(pdev);
5874         } else {
5875                 pci_wake_from_d3(pdev, false);
5876                 pci_set_power_state(pdev, PCI_D3hot);
5877         }
5878
5879         return 0;
5880 }
5881
5882 static int igb_resume(struct pci_dev *pdev)
5883 {
5884         struct net_device *netdev = pci_get_drvdata(pdev);
5885         struct igb_adapter *adapter = netdev_priv(netdev);
5886         struct e1000_hw *hw = &adapter->hw;
5887         u32 err;
5888
5889         pci_set_power_state(pdev, PCI_D0);
5890         pci_restore_state(pdev);
5891         pci_save_state(pdev);
5892
5893         err = pci_enable_device_mem(pdev);
5894         if (err) {
5895                 dev_err(&pdev->dev,
5896                         "igb: Cannot enable PCI device from suspend\n");
5897                 return err;
5898         }
5899         pci_set_master(pdev);
5900
5901         pci_enable_wake(pdev, PCI_D3hot, 0);
5902         pci_enable_wake(pdev, PCI_D3cold, 0);
5903
5904         if (igb_init_interrupt_scheme(adapter)) {
5905                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
5906                 return -ENOMEM;
5907         }
5908
5909         igb_reset(adapter);
5910
5911         /* let the f/w know that the h/w is now under the control of the
5912          * driver. */
5913         igb_get_hw_control(adapter);
5914
5915         wr32(E1000_WUS, ~0);
5916
5917         if (netif_running(netdev)) {
5918                 err = igb_open(netdev);
5919                 if (err)
5920                         return err;
5921         }
5922
5923         netif_device_attach(netdev);
5924
5925         return 0;
5926 }
5927 #endif
5928
5929 static void igb_shutdown(struct pci_dev *pdev)
5930 {
5931         bool wake;
5932
5933         __igb_shutdown(pdev, &wake);
5934
5935         if (system_state == SYSTEM_POWER_OFF) {
5936                 pci_wake_from_d3(pdev, wake);
5937                 pci_set_power_state(pdev, PCI_D3hot);
5938         }
5939 }
5940
5941 #ifdef CONFIG_NET_POLL_CONTROLLER
5942 /*
5943  * Polling 'interrupt' - used by things like netconsole to send skbs
5944  * without having to re-enable interrupts. It's not called while
5945  * the interrupt routine is executing.
5946  */
5947 static void igb_netpoll(struct net_device *netdev)
5948 {
5949         struct igb_adapter *adapter = netdev_priv(netdev);
5950         struct e1000_hw *hw = &adapter->hw;
5951         int i;
5952
5953         if (!adapter->msix_entries) {
5954                 struct igb_q_vector *q_vector = adapter->q_vector[0];
5955                 igb_irq_disable(adapter);
5956                 napi_schedule(&q_vector->napi);
5957                 return;
5958         }
5959
5960         for (i = 0; i < adapter->num_q_vectors; i++) {
5961                 struct igb_q_vector *q_vector = adapter->q_vector[i];
5962                 wr32(E1000_EIMC, q_vector->eims_value);
5963                 napi_schedule(&q_vector->napi);
5964         }
5965 }
5966 #endif /* CONFIG_NET_POLL_CONTROLLER */
5967
5968 /**
5969  * igb_io_error_detected - called when PCI error is detected
5970  * @pdev: Pointer to PCI device
5971  * @state: The current pci connection state
5972  *
5973  * This function is called after a PCI bus error affecting
5974  * this device has been detected.
5975  */
5976 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
5977                                               pci_channel_state_t state)
5978 {
5979         struct net_device *netdev = pci_get_drvdata(pdev);
5980         struct igb_adapter *adapter = netdev_priv(netdev);
5981
5982         netif_device_detach(netdev);
5983
5984         if (state == pci_channel_io_perm_failure)
5985                 return PCI_ERS_RESULT_DISCONNECT;
5986
5987         if (netif_running(netdev))
5988                 igb_down(adapter);
5989         pci_disable_device(pdev);
5990
5991         /* Request a slot slot reset. */
5992         return PCI_ERS_RESULT_NEED_RESET;
5993 }
5994
5995 /**
5996  * igb_io_slot_reset - called after the pci bus has been reset.
5997  * @pdev: Pointer to PCI device
5998  *
5999  * Restart the card from scratch, as if from a cold-boot. Implementation
6000  * resembles the first-half of the igb_resume routine.
6001  */
6002 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6003 {
6004         struct net_device *netdev = pci_get_drvdata(pdev);
6005         struct igb_adapter *adapter = netdev_priv(netdev);
6006         struct e1000_hw *hw = &adapter->hw;
6007         pci_ers_result_t result;
6008         int err;
6009
6010         if (pci_enable_device_mem(pdev)) {
6011                 dev_err(&pdev->dev,
6012                         "Cannot re-enable PCI device after reset.\n");
6013                 result = PCI_ERS_RESULT_DISCONNECT;
6014         } else {
6015                 pci_set_master(pdev);
6016                 pci_restore_state(pdev);
6017                 pci_save_state(pdev);
6018
6019                 pci_enable_wake(pdev, PCI_D3hot, 0);
6020                 pci_enable_wake(pdev, PCI_D3cold, 0);
6021
6022                 igb_reset(adapter);
6023                 wr32(E1000_WUS, ~0);
6024                 result = PCI_ERS_RESULT_RECOVERED;
6025         }
6026
6027         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6028         if (err) {
6029                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6030                         "failed 0x%0x\n", err);
6031                 /* non-fatal, continue */
6032         }
6033
6034         return result;
6035 }
6036
6037 /**
6038  * igb_io_resume - called when traffic can start flowing again.
6039  * @pdev: Pointer to PCI device
6040  *
6041  * This callback is called when the error recovery driver tells us that
6042  * its OK to resume normal operation. Implementation resembles the
6043  * second-half of the igb_resume routine.
6044  */
6045 static void igb_io_resume(struct pci_dev *pdev)
6046 {
6047         struct net_device *netdev = pci_get_drvdata(pdev);
6048         struct igb_adapter *adapter = netdev_priv(netdev);
6049
6050         if (netif_running(netdev)) {
6051                 if (igb_up(adapter)) {
6052                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6053                         return;
6054                 }
6055         }
6056
6057         netif_device_attach(netdev);
6058
6059         /* let the f/w know that the h/w is now under the control of the
6060          * driver. */
6061         igb_get_hw_control(adapter);
6062 }
6063
6064 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6065                              u8 qsel)
6066 {
6067         u32 rar_low, rar_high;
6068         struct e1000_hw *hw = &adapter->hw;
6069
6070         /* HW expects these in little endian so we reverse the byte order
6071          * from network order (big endian) to little endian
6072          */
6073         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6074                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6075         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6076
6077         /* Indicate to hardware the Address is Valid. */
6078         rar_high |= E1000_RAH_AV;
6079
6080         if (hw->mac.type == e1000_82575)
6081                 rar_high |= E1000_RAH_POOL_1 * qsel;
6082         else
6083                 rar_high |= E1000_RAH_POOL_1 << qsel;
6084
6085         wr32(E1000_RAL(index), rar_low);
6086         wrfl();
6087         wr32(E1000_RAH(index), rar_high);
6088         wrfl();
6089 }
6090
6091 static int igb_set_vf_mac(struct igb_adapter *adapter,
6092                           int vf, unsigned char *mac_addr)
6093 {
6094         struct e1000_hw *hw = &adapter->hw;
6095         /* VF MAC addresses start at end of receive addresses and moves
6096          * torwards the first, as a result a collision should not be possible */
6097         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6098
6099         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6100
6101         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6102
6103         return 0;
6104 }
6105
6106 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6107 {
6108         struct igb_adapter *adapter = netdev_priv(netdev);
6109         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6110                 return -EINVAL;
6111         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6112         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6113         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6114                                       " change effective.");
6115         if (test_bit(__IGB_DOWN, &adapter->state)) {
6116                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6117                          " but the PF device is not up.\n");
6118                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6119                          " attempting to use the VF device.\n");
6120         }
6121         return igb_set_vf_mac(adapter, vf, mac);
6122 }
6123
6124 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6125 {
6126         return -EOPNOTSUPP;
6127 }
6128
6129 static int igb_ndo_get_vf_config(struct net_device *netdev,
6130                                  int vf, struct ifla_vf_info *ivi)
6131 {
6132         struct igb_adapter *adapter = netdev_priv(netdev);
6133         if (vf >= adapter->vfs_allocated_count)
6134                 return -EINVAL;
6135         ivi->vf = vf;
6136         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6137         ivi->tx_rate = 0;
6138         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6139         ivi->qos = adapter->vf_data[vf].pf_qos;
6140         return 0;
6141 }
6142
6143 static void igb_vmm_control(struct igb_adapter *adapter)
6144 {
6145         struct e1000_hw *hw = &adapter->hw;
6146         u32 reg;
6147
6148         switch (hw->mac.type) {
6149         case e1000_82575:
6150         default:
6151                 /* replication is not supported for 82575 */
6152                 return;
6153         case e1000_82576:
6154                 /* notify HW that the MAC is adding vlan tags */
6155                 reg = rd32(E1000_DTXCTL);
6156                 reg |= E1000_DTXCTL_VLAN_ADDED;
6157                 wr32(E1000_DTXCTL, reg);
6158         case e1000_82580:
6159                 /* enable replication vlan tag stripping */
6160                 reg = rd32(E1000_RPLOLR);
6161                 reg |= E1000_RPLOLR_STRVLAN;
6162                 wr32(E1000_RPLOLR, reg);
6163         case e1000_i350:
6164                 /* none of the above registers are supported by i350 */
6165                 break;
6166         }
6167
6168         if (adapter->vfs_allocated_count) {
6169                 igb_vmdq_set_loopback_pf(hw, true);
6170                 igb_vmdq_set_replication_pf(hw, true);
6171         } else {
6172                 igb_vmdq_set_loopback_pf(hw, false);
6173                 igb_vmdq_set_replication_pf(hw, false);
6174         }
6175 }
6176
6177 /* igb_main.c */