]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
e1000/e1000e/igb/ixgb/ixgbe: set NETIF_F_HIGHDMA for VLAN feature flags
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
85         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
86         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
87         /* required last entry */
88         {0, }
89 };
90
91 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
92
93 void igb_reset(struct igb_adapter *);
94 static int igb_setup_all_tx_resources(struct igb_adapter *);
95 static int igb_setup_all_rx_resources(struct igb_adapter *);
96 static void igb_free_all_tx_resources(struct igb_adapter *);
97 static void igb_free_all_rx_resources(struct igb_adapter *);
98 static void igb_setup_mrqc(struct igb_adapter *);
99 void igb_update_stats(struct igb_adapter *);
100 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
101 static void __devexit igb_remove(struct pci_dev *pdev);
102 static int igb_sw_init(struct igb_adapter *);
103 static int igb_open(struct net_device *);
104 static int igb_close(struct net_device *);
105 static void igb_configure_tx(struct igb_adapter *);
106 static void igb_configure_rx(struct igb_adapter *);
107 static void igb_clean_all_tx_rings(struct igb_adapter *);
108 static void igb_clean_all_rx_rings(struct igb_adapter *);
109 static void igb_clean_tx_ring(struct igb_ring *);
110 static void igb_clean_rx_ring(struct igb_ring *);
111 static void igb_set_rx_mode(struct net_device *);
112 static void igb_update_phy_info(unsigned long);
113 static void igb_watchdog(unsigned long);
114 static void igb_watchdog_task(struct work_struct *);
115 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
116 static struct net_device_stats *igb_get_stats(struct net_device *);
117 static int igb_change_mtu(struct net_device *, int);
118 static int igb_set_mac(struct net_device *, void *);
119 static void igb_set_uta(struct igb_adapter *adapter);
120 static irqreturn_t igb_intr(int irq, void *);
121 static irqreturn_t igb_intr_msi(int irq, void *);
122 static irqreturn_t igb_msix_other(int irq, void *);
123 static irqreturn_t igb_msix_ring(int irq, void *);
124 #ifdef CONFIG_IGB_DCA
125 static void igb_update_dca(struct igb_q_vector *);
126 static void igb_setup_dca(struct igb_adapter *);
127 #endif /* CONFIG_IGB_DCA */
128 static bool igb_clean_tx_irq(struct igb_q_vector *);
129 static int igb_poll(struct napi_struct *, int);
130 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
131 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
132 static void igb_tx_timeout(struct net_device *);
133 static void igb_reset_task(struct work_struct *);
134 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
135 static void igb_vlan_rx_add_vid(struct net_device *, u16);
136 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
137 static void igb_restore_vlan(struct igb_adapter *);
138 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
139 static void igb_ping_all_vfs(struct igb_adapter *);
140 static void igb_msg_task(struct igb_adapter *);
141 static void igb_vmm_control(struct igb_adapter *);
142 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
143 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
144 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
145 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
146                                int vf, u16 vlan, u8 qos);
147 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
148 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
149                                  struct ifla_vf_info *ivi);
150
151 #ifdef CONFIG_PM
152 static int igb_suspend(struct pci_dev *, pm_message_t);
153 static int igb_resume(struct pci_dev *);
154 #endif
155 static void igb_shutdown(struct pci_dev *);
156 #ifdef CONFIG_IGB_DCA
157 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
158 static struct notifier_block dca_notifier = {
159         .notifier_call  = igb_notify_dca,
160         .next           = NULL,
161         .priority       = 0
162 };
163 #endif
164 #ifdef CONFIG_NET_POLL_CONTROLLER
165 /* for netdump / net console */
166 static void igb_netpoll(struct net_device *);
167 #endif
168 #ifdef CONFIG_PCI_IOV
169 static unsigned int max_vfs = 0;
170 module_param(max_vfs, uint, 0);
171 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
172                  "per physical function");
173 #endif /* CONFIG_PCI_IOV */
174
175 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
176                      pci_channel_state_t);
177 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
178 static void igb_io_resume(struct pci_dev *);
179
180 static struct pci_error_handlers igb_err_handler = {
181         .error_detected = igb_io_error_detected,
182         .slot_reset = igb_io_slot_reset,
183         .resume = igb_io_resume,
184 };
185
186
187 static struct pci_driver igb_driver = {
188         .name     = igb_driver_name,
189         .id_table = igb_pci_tbl,
190         .probe    = igb_probe,
191         .remove   = __devexit_p(igb_remove),
192 #ifdef CONFIG_PM
193         /* Power Managment Hooks */
194         .suspend  = igb_suspend,
195         .resume   = igb_resume,
196 #endif
197         .shutdown = igb_shutdown,
198         .err_handler = &igb_err_handler
199 };
200
201 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
202 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
203 MODULE_LICENSE("GPL");
204 MODULE_VERSION(DRV_VERSION);
205
206 struct igb_reg_info {
207         u32 ofs;
208         char *name;
209 };
210
211 static const struct igb_reg_info igb_reg_info_tbl[] = {
212
213         /* General Registers */
214         {E1000_CTRL, "CTRL"},
215         {E1000_STATUS, "STATUS"},
216         {E1000_CTRL_EXT, "CTRL_EXT"},
217
218         /* Interrupt Registers */
219         {E1000_ICR, "ICR"},
220
221         /* RX Registers */
222         {E1000_RCTL, "RCTL"},
223         {E1000_RDLEN(0), "RDLEN"},
224         {E1000_RDH(0), "RDH"},
225         {E1000_RDT(0), "RDT"},
226         {E1000_RXDCTL(0), "RXDCTL"},
227         {E1000_RDBAL(0), "RDBAL"},
228         {E1000_RDBAH(0), "RDBAH"},
229
230         /* TX Registers */
231         {E1000_TCTL, "TCTL"},
232         {E1000_TDBAL(0), "TDBAL"},
233         {E1000_TDBAH(0), "TDBAH"},
234         {E1000_TDLEN(0), "TDLEN"},
235         {E1000_TDH(0), "TDH"},
236         {E1000_TDT(0), "TDT"},
237         {E1000_TXDCTL(0), "TXDCTL"},
238         {E1000_TDFH, "TDFH"},
239         {E1000_TDFT, "TDFT"},
240         {E1000_TDFHS, "TDFHS"},
241         {E1000_TDFPC, "TDFPC"},
242
243         /* List Terminator */
244         {}
245 };
246
247 /*
248  * igb_regdump - register printout routine
249  */
250 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
251 {
252         int n = 0;
253         char rname[16];
254         u32 regs[8];
255
256         switch (reginfo->ofs) {
257         case E1000_RDLEN(0):
258                 for (n = 0; n < 4; n++)
259                         regs[n] = rd32(E1000_RDLEN(n));
260                 break;
261         case E1000_RDH(0):
262                 for (n = 0; n < 4; n++)
263                         regs[n] = rd32(E1000_RDH(n));
264                 break;
265         case E1000_RDT(0):
266                 for (n = 0; n < 4; n++)
267                         regs[n] = rd32(E1000_RDT(n));
268                 break;
269         case E1000_RXDCTL(0):
270                 for (n = 0; n < 4; n++)
271                         regs[n] = rd32(E1000_RXDCTL(n));
272                 break;
273         case E1000_RDBAL(0):
274                 for (n = 0; n < 4; n++)
275                         regs[n] = rd32(E1000_RDBAL(n));
276                 break;
277         case E1000_RDBAH(0):
278                 for (n = 0; n < 4; n++)
279                         regs[n] = rd32(E1000_RDBAH(n));
280                 break;
281         case E1000_TDBAL(0):
282                 for (n = 0; n < 4; n++)
283                         regs[n] = rd32(E1000_RDBAL(n));
284                 break;
285         case E1000_TDBAH(0):
286                 for (n = 0; n < 4; n++)
287                         regs[n] = rd32(E1000_TDBAH(n));
288                 break;
289         case E1000_TDLEN(0):
290                 for (n = 0; n < 4; n++)
291                         regs[n] = rd32(E1000_TDLEN(n));
292                 break;
293         case E1000_TDH(0):
294                 for (n = 0; n < 4; n++)
295                         regs[n] = rd32(E1000_TDH(n));
296                 break;
297         case E1000_TDT(0):
298                 for (n = 0; n < 4; n++)
299                         regs[n] = rd32(E1000_TDT(n));
300                 break;
301         case E1000_TXDCTL(0):
302                 for (n = 0; n < 4; n++)
303                         regs[n] = rd32(E1000_TXDCTL(n));
304                 break;
305         default:
306                 printk(KERN_INFO "%-15s %08x\n",
307                         reginfo->name, rd32(reginfo->ofs));
308                 return;
309         }
310
311         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
312         printk(KERN_INFO "%-15s ", rname);
313         for (n = 0; n < 4; n++)
314                 printk(KERN_CONT "%08x ", regs[n]);
315         printk(KERN_CONT "\n");
316 }
317
318 /*
319  * igb_dump - Print registers, tx-rings and rx-rings
320  */
321 static void igb_dump(struct igb_adapter *adapter)
322 {
323         struct net_device *netdev = adapter->netdev;
324         struct e1000_hw *hw = &adapter->hw;
325         struct igb_reg_info *reginfo;
326         int n = 0;
327         struct igb_ring *tx_ring;
328         union e1000_adv_tx_desc *tx_desc;
329         struct my_u0 { u64 a; u64 b; } *u0;
330         struct igb_buffer *buffer_info;
331         struct igb_ring *rx_ring;
332         union e1000_adv_rx_desc *rx_desc;
333         u32 staterr;
334         int i = 0;
335
336         if (!netif_msg_hw(adapter))
337                 return;
338
339         /* Print netdevice Info */
340         if (netdev) {
341                 dev_info(&adapter->pdev->dev, "Net device Info\n");
342                 printk(KERN_INFO "Device Name     state            "
343                         "trans_start      last_rx\n");
344                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
345                 netdev->name,
346                 netdev->state,
347                 netdev->trans_start,
348                 netdev->last_rx);
349         }
350
351         /* Print Registers */
352         dev_info(&adapter->pdev->dev, "Register Dump\n");
353         printk(KERN_INFO " Register Name   Value\n");
354         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
355              reginfo->name; reginfo++) {
356                 igb_regdump(hw, reginfo);
357         }
358
359         /* Print TX Ring Summary */
360         if (!netdev || !netif_running(netdev))
361                 goto exit;
362
363         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
364         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
365                 " leng ntw timestamp\n");
366         for (n = 0; n < adapter->num_tx_queues; n++) {
367                 tx_ring = adapter->tx_ring[n];
368                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
369                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
370                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
371                            (u64)buffer_info->dma,
372                            buffer_info->length,
373                            buffer_info->next_to_watch,
374                            (u64)buffer_info->time_stamp);
375         }
376
377         /* Print TX Rings */
378         if (!netif_msg_tx_done(adapter))
379                 goto rx_ring_summary;
380
381         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
382
383         /* Transmit Descriptor Formats
384          *
385          * Advanced Transmit Descriptor
386          *   +--------------------------------------------------------------+
387          * 0 |         Buffer Address [63:0]                                |
388          *   +--------------------------------------------------------------+
389          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
390          *   +--------------------------------------------------------------+
391          *   63      46 45    40 39 38 36 35 32 31   24             15       0
392          */
393
394         for (n = 0; n < adapter->num_tx_queues; n++) {
395                 tx_ring = adapter->tx_ring[n];
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
398                 printk(KERN_INFO "------------------------------------\n");
399                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
400                         "[PlPOCIStDDM Ln] [bi->dma       ] "
401                         "leng  ntw timestamp        bi->skb\n");
402
403                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
404                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
405                         buffer_info = &tx_ring->buffer_info[i];
406                         u0 = (struct my_u0 *)tx_desc;
407                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
408                                 " %04X  %3X %016llX %p", i,
409                                 le64_to_cpu(u0->a),
410                                 le64_to_cpu(u0->b),
411                                 (u64)buffer_info->dma,
412                                 buffer_info->length,
413                                 buffer_info->next_to_watch,
414                                 (u64)buffer_info->time_stamp,
415                                 buffer_info->skb);
416                         if (i == tx_ring->next_to_use &&
417                                 i == tx_ring->next_to_clean)
418                                 printk(KERN_CONT " NTC/U\n");
419                         else if (i == tx_ring->next_to_use)
420                                 printk(KERN_CONT " NTU\n");
421                         else if (i == tx_ring->next_to_clean)
422                                 printk(KERN_CONT " NTC\n");
423                         else
424                                 printk(KERN_CONT "\n");
425
426                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
427                                 print_hex_dump(KERN_INFO, "",
428                                         DUMP_PREFIX_ADDRESS,
429                                         16, 1, phys_to_virt(buffer_info->dma),
430                                         buffer_info->length, true);
431                 }
432         }
433
434         /* Print RX Rings Summary */
435 rx_ring_summary:
436         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
437         printk(KERN_INFO "Queue [NTU] [NTC]\n");
438         for (n = 0; n < adapter->num_rx_queues; n++) {
439                 rx_ring = adapter->rx_ring[n];
440                 printk(KERN_INFO " %5d %5X %5X\n", n,
441                            rx_ring->next_to_use, rx_ring->next_to_clean);
442         }
443
444         /* Print RX Rings */
445         if (!netif_msg_rx_status(adapter))
446                 goto exit;
447
448         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
449
450         /* Advanced Receive Descriptor (Read) Format
451          *    63                                           1        0
452          *    +-----------------------------------------------------+
453          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
454          *    +----------------------------------------------+------+
455          *  8 |       Header Buffer Address [63:1]           |  DD  |
456          *    +-----------------------------------------------------+
457          *
458          *
459          * Advanced Receive Descriptor (Write-Back) Format
460          *
461          *   63       48 47    32 31  30      21 20 17 16   4 3     0
462          *   +------------------------------------------------------+
463          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
464          *   | Checksum   Ident  |   |           |    | Type | Type |
465          *   +------------------------------------------------------+
466          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
467          *   +------------------------------------------------------+
468          *   63       48 47    32 31            20 19               0
469          */
470
471         for (n = 0; n < adapter->num_rx_queues; n++) {
472                 rx_ring = adapter->rx_ring[n];
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
475                 printk(KERN_INFO "------------------------------------\n");
476                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
477                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
478                         "<-- Adv Rx Read format\n");
479                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
480                         "[vl er S cks ln] ---------------- [bi->skb] "
481                         "<-- Adv Rx Write-Back format\n");
482
483                 for (i = 0; i < rx_ring->count; i++) {
484                         buffer_info = &rx_ring->buffer_info[i];
485                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
486                         u0 = (struct my_u0 *)rx_desc;
487                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
488                         if (staterr & E1000_RXD_STAT_DD) {
489                                 /* Descriptor Done */
490                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
491                                         "%016llX ---------------- %p", i,
492                                         le64_to_cpu(u0->a),
493                                         le64_to_cpu(u0->b),
494                                         buffer_info->skb);
495                         } else {
496                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
497                                         "%016llX %016llX %p", i,
498                                         le64_to_cpu(u0->a),
499                                         le64_to_cpu(u0->b),
500                                         (u64)buffer_info->dma,
501                                         buffer_info->skb);
502
503                                 if (netif_msg_pktdata(adapter)) {
504                                         print_hex_dump(KERN_INFO, "",
505                                                 DUMP_PREFIX_ADDRESS,
506                                                 16, 1,
507                                                 phys_to_virt(buffer_info->dma),
508                                                 rx_ring->rx_buffer_len, true);
509                                         if (rx_ring->rx_buffer_len
510                                                 < IGB_RXBUFFER_1024)
511                                                 print_hex_dump(KERN_INFO, "",
512                                                   DUMP_PREFIX_ADDRESS,
513                                                   16, 1,
514                                                   phys_to_virt(
515                                                     buffer_info->page_dma +
516                                                     buffer_info->page_offset),
517                                                   PAGE_SIZE/2, true);
518                                 }
519                         }
520
521                         if (i == rx_ring->next_to_use)
522                                 printk(KERN_CONT " NTU\n");
523                         else if (i == rx_ring->next_to_clean)
524                                 printk(KERN_CONT " NTC\n");
525                         else
526                                 printk(KERN_CONT "\n");
527
528                 }
529         }
530
531 exit:
532         return;
533 }
534
535
536 /**
537  * igb_read_clock - read raw cycle counter (to be used by time counter)
538  */
539 static cycle_t igb_read_clock(const struct cyclecounter *tc)
540 {
541         struct igb_adapter *adapter =
542                 container_of(tc, struct igb_adapter, cycles);
543         struct e1000_hw *hw = &adapter->hw;
544         u64 stamp = 0;
545         int shift = 0;
546
547         /*
548          * The timestamp latches on lowest register read. For the 82580
549          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
550          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
551          */
552         if (hw->mac.type == e1000_82580) {
553                 stamp = rd32(E1000_SYSTIMR) >> 8;
554                 shift = IGB_82580_TSYNC_SHIFT;
555         }
556
557         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
558         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
559         return stamp;
560 }
561
562 /**
563  * igb_get_hw_dev - return device
564  * used by hardware layer to print debugging information
565  **/
566 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
567 {
568         struct igb_adapter *adapter = hw->back;
569         return adapter->netdev;
570 }
571
572 /**
573  * igb_init_module - Driver Registration Routine
574  *
575  * igb_init_module is the first routine called when the driver is
576  * loaded. All it does is register with the PCI subsystem.
577  **/
578 static int __init igb_init_module(void)
579 {
580         int ret;
581         printk(KERN_INFO "%s - version %s\n",
582                igb_driver_string, igb_driver_version);
583
584         printk(KERN_INFO "%s\n", igb_copyright);
585
586 #ifdef CONFIG_IGB_DCA
587         dca_register_notify(&dca_notifier);
588 #endif
589         ret = pci_register_driver(&igb_driver);
590         return ret;
591 }
592
593 module_init(igb_init_module);
594
595 /**
596  * igb_exit_module - Driver Exit Cleanup Routine
597  *
598  * igb_exit_module is called just before the driver is removed
599  * from memory.
600  **/
601 static void __exit igb_exit_module(void)
602 {
603 #ifdef CONFIG_IGB_DCA
604         dca_unregister_notify(&dca_notifier);
605 #endif
606         pci_unregister_driver(&igb_driver);
607 }
608
609 module_exit(igb_exit_module);
610
611 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
612 /**
613  * igb_cache_ring_register - Descriptor ring to register mapping
614  * @adapter: board private structure to initialize
615  *
616  * Once we know the feature-set enabled for the device, we'll cache
617  * the register offset the descriptor ring is assigned to.
618  **/
619 static void igb_cache_ring_register(struct igb_adapter *adapter)
620 {
621         int i = 0, j = 0;
622         u32 rbase_offset = adapter->vfs_allocated_count;
623
624         switch (adapter->hw.mac.type) {
625         case e1000_82576:
626                 /* The queues are allocated for virtualization such that VF 0
627                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
628                  * In order to avoid collision we start at the first free queue
629                  * and continue consuming queues in the same sequence
630                  */
631                 if (adapter->vfs_allocated_count) {
632                         for (; i < adapter->rss_queues; i++)
633                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
634                                                                Q_IDX_82576(i);
635                 }
636         case e1000_82575:
637         case e1000_82580:
638         case e1000_i350:
639         default:
640                 for (; i < adapter->num_rx_queues; i++)
641                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
642                 for (; j < adapter->num_tx_queues; j++)
643                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
644                 break;
645         }
646 }
647
648 static void igb_free_queues(struct igb_adapter *adapter)
649 {
650         int i;
651
652         for (i = 0; i < adapter->num_tx_queues; i++) {
653                 kfree(adapter->tx_ring[i]);
654                 adapter->tx_ring[i] = NULL;
655         }
656         for (i = 0; i < adapter->num_rx_queues; i++) {
657                 kfree(adapter->rx_ring[i]);
658                 adapter->rx_ring[i] = NULL;
659         }
660         adapter->num_rx_queues = 0;
661         adapter->num_tx_queues = 0;
662 }
663
664 /**
665  * igb_alloc_queues - Allocate memory for all rings
666  * @adapter: board private structure to initialize
667  *
668  * We allocate one ring per queue at run-time since we don't know the
669  * number of queues at compile-time.
670  **/
671 static int igb_alloc_queues(struct igb_adapter *adapter)
672 {
673         struct igb_ring *ring;
674         int i;
675
676         for (i = 0; i < adapter->num_tx_queues; i++) {
677                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
678                 if (!ring)
679                         goto err;
680                 ring->count = adapter->tx_ring_count;
681                 ring->queue_index = i;
682                 ring->dev = &adapter->pdev->dev;
683                 ring->netdev = adapter->netdev;
684                 /* For 82575, context index must be unique per ring. */
685                 if (adapter->hw.mac.type == e1000_82575)
686                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
687                 adapter->tx_ring[i] = ring;
688         }
689
690         for (i = 0; i < adapter->num_rx_queues; i++) {
691                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
692                 if (!ring)
693                         goto err;
694                 ring->count = adapter->rx_ring_count;
695                 ring->queue_index = i;
696                 ring->dev = &adapter->pdev->dev;
697                 ring->netdev = adapter->netdev;
698                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
699                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
700                 /* set flag indicating ring supports SCTP checksum offload */
701                 if (adapter->hw.mac.type >= e1000_82576)
702                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
703                 adapter->rx_ring[i] = ring;
704         }
705
706         igb_cache_ring_register(adapter);
707
708         return 0;
709
710 err:
711         igb_free_queues(adapter);
712
713         return -ENOMEM;
714 }
715
716 #define IGB_N0_QUEUE -1
717 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
718 {
719         u32 msixbm = 0;
720         struct igb_adapter *adapter = q_vector->adapter;
721         struct e1000_hw *hw = &adapter->hw;
722         u32 ivar, index;
723         int rx_queue = IGB_N0_QUEUE;
724         int tx_queue = IGB_N0_QUEUE;
725
726         if (q_vector->rx_ring)
727                 rx_queue = q_vector->rx_ring->reg_idx;
728         if (q_vector->tx_ring)
729                 tx_queue = q_vector->tx_ring->reg_idx;
730
731         switch (hw->mac.type) {
732         case e1000_82575:
733                 /* The 82575 assigns vectors using a bitmask, which matches the
734                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
735                    or more queues to a vector, we write the appropriate bits
736                    into the MSIXBM register for that vector. */
737                 if (rx_queue > IGB_N0_QUEUE)
738                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
739                 if (tx_queue > IGB_N0_QUEUE)
740                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
741                 if (!adapter->msix_entries && msix_vector == 0)
742                         msixbm |= E1000_EIMS_OTHER;
743                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
744                 q_vector->eims_value = msixbm;
745                 break;
746         case e1000_82576:
747                 /* 82576 uses a table-based method for assigning vectors.
748                    Each queue has a single entry in the table to which we write
749                    a vector number along with a "valid" bit.  Sadly, the layout
750                    of the table is somewhat counterintuitive. */
751                 if (rx_queue > IGB_N0_QUEUE) {
752                         index = (rx_queue & 0x7);
753                         ivar = array_rd32(E1000_IVAR0, index);
754                         if (rx_queue < 8) {
755                                 /* vector goes into low byte of register */
756                                 ivar = ivar & 0xFFFFFF00;
757                                 ivar |= msix_vector | E1000_IVAR_VALID;
758                         } else {
759                                 /* vector goes into third byte of register */
760                                 ivar = ivar & 0xFF00FFFF;
761                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
762                         }
763                         array_wr32(E1000_IVAR0, index, ivar);
764                 }
765                 if (tx_queue > IGB_N0_QUEUE) {
766                         index = (tx_queue & 0x7);
767                         ivar = array_rd32(E1000_IVAR0, index);
768                         if (tx_queue < 8) {
769                                 /* vector goes into second byte of register */
770                                 ivar = ivar & 0xFFFF00FF;
771                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
772                         } else {
773                                 /* vector goes into high byte of register */
774                                 ivar = ivar & 0x00FFFFFF;
775                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
776                         }
777                         array_wr32(E1000_IVAR0, index, ivar);
778                 }
779                 q_vector->eims_value = 1 << msix_vector;
780                 break;
781         case e1000_82580:
782         case e1000_i350:
783                 /* 82580 uses the same table-based approach as 82576 but has fewer
784                    entries as a result we carry over for queues greater than 4. */
785                 if (rx_queue > IGB_N0_QUEUE) {
786                         index = (rx_queue >> 1);
787                         ivar = array_rd32(E1000_IVAR0, index);
788                         if (rx_queue & 0x1) {
789                                 /* vector goes into third byte of register */
790                                 ivar = ivar & 0xFF00FFFF;
791                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
792                         } else {
793                                 /* vector goes into low byte of register */
794                                 ivar = ivar & 0xFFFFFF00;
795                                 ivar |= msix_vector | E1000_IVAR_VALID;
796                         }
797                         array_wr32(E1000_IVAR0, index, ivar);
798                 }
799                 if (tx_queue > IGB_N0_QUEUE) {
800                         index = (tx_queue >> 1);
801                         ivar = array_rd32(E1000_IVAR0, index);
802                         if (tx_queue & 0x1) {
803                                 /* vector goes into high byte of register */
804                                 ivar = ivar & 0x00FFFFFF;
805                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
806                         } else {
807                                 /* vector goes into second byte of register */
808                                 ivar = ivar & 0xFFFF00FF;
809                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
810                         }
811                         array_wr32(E1000_IVAR0, index, ivar);
812                 }
813                 q_vector->eims_value = 1 << msix_vector;
814                 break;
815         default:
816                 BUG();
817                 break;
818         }
819
820         /* add q_vector eims value to global eims_enable_mask */
821         adapter->eims_enable_mask |= q_vector->eims_value;
822
823         /* configure q_vector to set itr on first interrupt */
824         q_vector->set_itr = 1;
825 }
826
827 /**
828  * igb_configure_msix - Configure MSI-X hardware
829  *
830  * igb_configure_msix sets up the hardware to properly
831  * generate MSI-X interrupts.
832  **/
833 static void igb_configure_msix(struct igb_adapter *adapter)
834 {
835         u32 tmp;
836         int i, vector = 0;
837         struct e1000_hw *hw = &adapter->hw;
838
839         adapter->eims_enable_mask = 0;
840
841         /* set vector for other causes, i.e. link changes */
842         switch (hw->mac.type) {
843         case e1000_82575:
844                 tmp = rd32(E1000_CTRL_EXT);
845                 /* enable MSI-X PBA support*/
846                 tmp |= E1000_CTRL_EXT_PBA_CLR;
847
848                 /* Auto-Mask interrupts upon ICR read. */
849                 tmp |= E1000_CTRL_EXT_EIAME;
850                 tmp |= E1000_CTRL_EXT_IRCA;
851
852                 wr32(E1000_CTRL_EXT, tmp);
853
854                 /* enable msix_other interrupt */
855                 array_wr32(E1000_MSIXBM(0), vector++,
856                                       E1000_EIMS_OTHER);
857                 adapter->eims_other = E1000_EIMS_OTHER;
858
859                 break;
860
861         case e1000_82576:
862         case e1000_82580:
863         case e1000_i350:
864                 /* Turn on MSI-X capability first, or our settings
865                  * won't stick.  And it will take days to debug. */
866                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
867                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
868                                 E1000_GPIE_NSICR);
869
870                 /* enable msix_other interrupt */
871                 adapter->eims_other = 1 << vector;
872                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
873
874                 wr32(E1000_IVAR_MISC, tmp);
875                 break;
876         default:
877                 /* do nothing, since nothing else supports MSI-X */
878                 break;
879         } /* switch (hw->mac.type) */
880
881         adapter->eims_enable_mask |= adapter->eims_other;
882
883         for (i = 0; i < adapter->num_q_vectors; i++)
884                 igb_assign_vector(adapter->q_vector[i], vector++);
885
886         wrfl();
887 }
888
889 /**
890  * igb_request_msix - Initialize MSI-X interrupts
891  *
892  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
893  * kernel.
894  **/
895 static int igb_request_msix(struct igb_adapter *adapter)
896 {
897         struct net_device *netdev = adapter->netdev;
898         struct e1000_hw *hw = &adapter->hw;
899         int i, err = 0, vector = 0;
900
901         err = request_irq(adapter->msix_entries[vector].vector,
902                           igb_msix_other, 0, netdev->name, adapter);
903         if (err)
904                 goto out;
905         vector++;
906
907         for (i = 0; i < adapter->num_q_vectors; i++) {
908                 struct igb_q_vector *q_vector = adapter->q_vector[i];
909
910                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
911
912                 if (q_vector->rx_ring && q_vector->tx_ring)
913                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
914                                 q_vector->rx_ring->queue_index);
915                 else if (q_vector->tx_ring)
916                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
917                                 q_vector->tx_ring->queue_index);
918                 else if (q_vector->rx_ring)
919                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
920                                 q_vector->rx_ring->queue_index);
921                 else
922                         sprintf(q_vector->name, "%s-unused", netdev->name);
923
924                 err = request_irq(adapter->msix_entries[vector].vector,
925                                   igb_msix_ring, 0, q_vector->name,
926                                   q_vector);
927                 if (err)
928                         goto out;
929                 vector++;
930         }
931
932         igb_configure_msix(adapter);
933         return 0;
934 out:
935         return err;
936 }
937
938 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
939 {
940         if (adapter->msix_entries) {
941                 pci_disable_msix(adapter->pdev);
942                 kfree(adapter->msix_entries);
943                 adapter->msix_entries = NULL;
944         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
945                 pci_disable_msi(adapter->pdev);
946         }
947 }
948
949 /**
950  * igb_free_q_vectors - Free memory allocated for interrupt vectors
951  * @adapter: board private structure to initialize
952  *
953  * This function frees the memory allocated to the q_vectors.  In addition if
954  * NAPI is enabled it will delete any references to the NAPI struct prior
955  * to freeing the q_vector.
956  **/
957 static void igb_free_q_vectors(struct igb_adapter *adapter)
958 {
959         int v_idx;
960
961         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
962                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
963                 adapter->q_vector[v_idx] = NULL;
964                 if (!q_vector)
965                         continue;
966                 netif_napi_del(&q_vector->napi);
967                 kfree(q_vector);
968         }
969         adapter->num_q_vectors = 0;
970 }
971
972 /**
973  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
974  *
975  * This function resets the device so that it has 0 rx queues, tx queues, and
976  * MSI-X interrupts allocated.
977  */
978 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
979 {
980         igb_free_queues(adapter);
981         igb_free_q_vectors(adapter);
982         igb_reset_interrupt_capability(adapter);
983 }
984
985 /**
986  * igb_set_interrupt_capability - set MSI or MSI-X if supported
987  *
988  * Attempt to configure interrupts using the best available
989  * capabilities of the hardware and kernel.
990  **/
991 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
992 {
993         int err;
994         int numvecs, i;
995
996         /* Number of supported queues. */
997         adapter->num_rx_queues = adapter->rss_queues;
998         if (adapter->vfs_allocated_count)
999                 adapter->num_tx_queues = 1;
1000         else
1001                 adapter->num_tx_queues = adapter->rss_queues;
1002
1003         /* start with one vector for every rx queue */
1004         numvecs = adapter->num_rx_queues;
1005
1006         /* if tx handler is separate add 1 for every tx queue */
1007         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1008                 numvecs += adapter->num_tx_queues;
1009
1010         /* store the number of vectors reserved for queues */
1011         adapter->num_q_vectors = numvecs;
1012
1013         /* add 1 vector for link status interrupts */
1014         numvecs++;
1015         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1016                                         GFP_KERNEL);
1017         if (!adapter->msix_entries)
1018                 goto msi_only;
1019
1020         for (i = 0; i < numvecs; i++)
1021                 adapter->msix_entries[i].entry = i;
1022
1023         err = pci_enable_msix(adapter->pdev,
1024                               adapter->msix_entries,
1025                               numvecs);
1026         if (err == 0)
1027                 goto out;
1028
1029         igb_reset_interrupt_capability(adapter);
1030
1031         /* If we can't do MSI-X, try MSI */
1032 msi_only:
1033 #ifdef CONFIG_PCI_IOV
1034         /* disable SR-IOV for non MSI-X configurations */
1035         if (adapter->vf_data) {
1036                 struct e1000_hw *hw = &adapter->hw;
1037                 /* disable iov and allow time for transactions to clear */
1038                 pci_disable_sriov(adapter->pdev);
1039                 msleep(500);
1040
1041                 kfree(adapter->vf_data);
1042                 adapter->vf_data = NULL;
1043                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1044                 msleep(100);
1045                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1046         }
1047 #endif
1048         adapter->vfs_allocated_count = 0;
1049         adapter->rss_queues = 1;
1050         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1051         adapter->num_rx_queues = 1;
1052         adapter->num_tx_queues = 1;
1053         adapter->num_q_vectors = 1;
1054         if (!pci_enable_msi(adapter->pdev))
1055                 adapter->flags |= IGB_FLAG_HAS_MSI;
1056 out:
1057         /* Notify the stack of the (possibly) reduced Tx Queue count. */
1058         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
1059 }
1060
1061 /**
1062  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1063  * @adapter: board private structure to initialize
1064  *
1065  * We allocate one q_vector per queue interrupt.  If allocation fails we
1066  * return -ENOMEM.
1067  **/
1068 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1069 {
1070         struct igb_q_vector *q_vector;
1071         struct e1000_hw *hw = &adapter->hw;
1072         int v_idx;
1073
1074         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1075                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1076                 if (!q_vector)
1077                         goto err_out;
1078                 q_vector->adapter = adapter;
1079                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1080                 q_vector->itr_val = IGB_START_ITR;
1081                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1082                 adapter->q_vector[v_idx] = q_vector;
1083         }
1084         return 0;
1085
1086 err_out:
1087         igb_free_q_vectors(adapter);
1088         return -ENOMEM;
1089 }
1090
1091 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1092                                       int ring_idx, int v_idx)
1093 {
1094         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1095
1096         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1097         q_vector->rx_ring->q_vector = q_vector;
1098         q_vector->itr_val = adapter->rx_itr_setting;
1099         if (q_vector->itr_val && q_vector->itr_val <= 3)
1100                 q_vector->itr_val = IGB_START_ITR;
1101 }
1102
1103 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1104                                       int ring_idx, int v_idx)
1105 {
1106         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1107
1108         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1109         q_vector->tx_ring->q_vector = q_vector;
1110         q_vector->itr_val = adapter->tx_itr_setting;
1111         if (q_vector->itr_val && q_vector->itr_val <= 3)
1112                 q_vector->itr_val = IGB_START_ITR;
1113 }
1114
1115 /**
1116  * igb_map_ring_to_vector - maps allocated queues to vectors
1117  *
1118  * This function maps the recently allocated queues to vectors.
1119  **/
1120 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1121 {
1122         int i;
1123         int v_idx = 0;
1124
1125         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1126             (adapter->num_q_vectors < adapter->num_tx_queues))
1127                 return -ENOMEM;
1128
1129         if (adapter->num_q_vectors >=
1130             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1131                 for (i = 0; i < adapter->num_rx_queues; i++)
1132                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1133                 for (i = 0; i < adapter->num_tx_queues; i++)
1134                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1135         } else {
1136                 for (i = 0; i < adapter->num_rx_queues; i++) {
1137                         if (i < adapter->num_tx_queues)
1138                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1139                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1140                 }
1141                 for (; i < adapter->num_tx_queues; i++)
1142                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1143         }
1144         return 0;
1145 }
1146
1147 /**
1148  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1149  *
1150  * This function initializes the interrupts and allocates all of the queues.
1151  **/
1152 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1153 {
1154         struct pci_dev *pdev = adapter->pdev;
1155         int err;
1156
1157         igb_set_interrupt_capability(adapter);
1158
1159         err = igb_alloc_q_vectors(adapter);
1160         if (err) {
1161                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1162                 goto err_alloc_q_vectors;
1163         }
1164
1165         err = igb_alloc_queues(adapter);
1166         if (err) {
1167                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1168                 goto err_alloc_queues;
1169         }
1170
1171         err = igb_map_ring_to_vector(adapter);
1172         if (err) {
1173                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1174                 goto err_map_queues;
1175         }
1176
1177
1178         return 0;
1179 err_map_queues:
1180         igb_free_queues(adapter);
1181 err_alloc_queues:
1182         igb_free_q_vectors(adapter);
1183 err_alloc_q_vectors:
1184         igb_reset_interrupt_capability(adapter);
1185         return err;
1186 }
1187
1188 /**
1189  * igb_request_irq - initialize interrupts
1190  *
1191  * Attempts to configure interrupts using the best available
1192  * capabilities of the hardware and kernel.
1193  **/
1194 static int igb_request_irq(struct igb_adapter *adapter)
1195 {
1196         struct net_device *netdev = adapter->netdev;
1197         struct pci_dev *pdev = adapter->pdev;
1198         int err = 0;
1199
1200         if (adapter->msix_entries) {
1201                 err = igb_request_msix(adapter);
1202                 if (!err)
1203                         goto request_done;
1204                 /* fall back to MSI */
1205                 igb_clear_interrupt_scheme(adapter);
1206                 if (!pci_enable_msi(adapter->pdev))
1207                         adapter->flags |= IGB_FLAG_HAS_MSI;
1208                 igb_free_all_tx_resources(adapter);
1209                 igb_free_all_rx_resources(adapter);
1210                 adapter->num_tx_queues = 1;
1211                 adapter->num_rx_queues = 1;
1212                 adapter->num_q_vectors = 1;
1213                 err = igb_alloc_q_vectors(adapter);
1214                 if (err) {
1215                         dev_err(&pdev->dev,
1216                                 "Unable to allocate memory for vectors\n");
1217                         goto request_done;
1218                 }
1219                 err = igb_alloc_queues(adapter);
1220                 if (err) {
1221                         dev_err(&pdev->dev,
1222                                 "Unable to allocate memory for queues\n");
1223                         igb_free_q_vectors(adapter);
1224                         goto request_done;
1225                 }
1226                 igb_setup_all_tx_resources(adapter);
1227                 igb_setup_all_rx_resources(adapter);
1228         } else {
1229                 igb_assign_vector(adapter->q_vector[0], 0);
1230         }
1231
1232         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1233                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1234                                   netdev->name, adapter);
1235                 if (!err)
1236                         goto request_done;
1237
1238                 /* fall back to legacy interrupts */
1239                 igb_reset_interrupt_capability(adapter);
1240                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1241         }
1242
1243         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1244                           netdev->name, adapter);
1245
1246         if (err)
1247                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1248                         err);
1249
1250 request_done:
1251         return err;
1252 }
1253
1254 static void igb_free_irq(struct igb_adapter *adapter)
1255 {
1256         if (adapter->msix_entries) {
1257                 int vector = 0, i;
1258
1259                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1260
1261                 for (i = 0; i < adapter->num_q_vectors; i++) {
1262                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1263                         free_irq(adapter->msix_entries[vector++].vector,
1264                                  q_vector);
1265                 }
1266         } else {
1267                 free_irq(adapter->pdev->irq, adapter);
1268         }
1269 }
1270
1271 /**
1272  * igb_irq_disable - Mask off interrupt generation on the NIC
1273  * @adapter: board private structure
1274  **/
1275 static void igb_irq_disable(struct igb_adapter *adapter)
1276 {
1277         struct e1000_hw *hw = &adapter->hw;
1278
1279         /*
1280          * we need to be careful when disabling interrupts.  The VFs are also
1281          * mapped into these registers and so clearing the bits can cause
1282          * issues on the VF drivers so we only need to clear what we set
1283          */
1284         if (adapter->msix_entries) {
1285                 u32 regval = rd32(E1000_EIAM);
1286                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1287                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1288                 regval = rd32(E1000_EIAC);
1289                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1290         }
1291
1292         wr32(E1000_IAM, 0);
1293         wr32(E1000_IMC, ~0);
1294         wrfl();
1295         if (adapter->msix_entries) {
1296                 int i;
1297                 for (i = 0; i < adapter->num_q_vectors; i++)
1298                         synchronize_irq(adapter->msix_entries[i].vector);
1299         } else {
1300                 synchronize_irq(adapter->pdev->irq);
1301         }
1302 }
1303
1304 /**
1305  * igb_irq_enable - Enable default interrupt generation settings
1306  * @adapter: board private structure
1307  **/
1308 static void igb_irq_enable(struct igb_adapter *adapter)
1309 {
1310         struct e1000_hw *hw = &adapter->hw;
1311
1312         if (adapter->msix_entries) {
1313                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1314                 u32 regval = rd32(E1000_EIAC);
1315                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1316                 regval = rd32(E1000_EIAM);
1317                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1318                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1319                 if (adapter->vfs_allocated_count) {
1320                         wr32(E1000_MBVFIMR, 0xFF);
1321                         ims |= E1000_IMS_VMMB;
1322                 }
1323                 if (adapter->hw.mac.type == e1000_82580)
1324                         ims |= E1000_IMS_DRSTA;
1325
1326                 wr32(E1000_IMS, ims);
1327         } else {
1328                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1329                                 E1000_IMS_DRSTA);
1330                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1331                                 E1000_IMS_DRSTA);
1332         }
1333 }
1334
1335 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1336 {
1337         struct e1000_hw *hw = &adapter->hw;
1338         u16 vid = adapter->hw.mng_cookie.vlan_id;
1339         u16 old_vid = adapter->mng_vlan_id;
1340
1341         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1342                 /* add VID to filter table */
1343                 igb_vfta_set(hw, vid, true);
1344                 adapter->mng_vlan_id = vid;
1345         } else {
1346                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1347         }
1348
1349         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1350             (vid != old_vid) &&
1351             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1352                 /* remove VID from filter table */
1353                 igb_vfta_set(hw, old_vid, false);
1354         }
1355 }
1356
1357 /**
1358  * igb_release_hw_control - release control of the h/w to f/w
1359  * @adapter: address of board private structure
1360  *
1361  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1362  * For ASF and Pass Through versions of f/w this means that the
1363  * driver is no longer loaded.
1364  *
1365  **/
1366 static void igb_release_hw_control(struct igb_adapter *adapter)
1367 {
1368         struct e1000_hw *hw = &adapter->hw;
1369         u32 ctrl_ext;
1370
1371         /* Let firmware take over control of h/w */
1372         ctrl_ext = rd32(E1000_CTRL_EXT);
1373         wr32(E1000_CTRL_EXT,
1374                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1375 }
1376
1377 /**
1378  * igb_get_hw_control - get control of the h/w from f/w
1379  * @adapter: address of board private structure
1380  *
1381  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1382  * For ASF and Pass Through versions of f/w this means that
1383  * the driver is loaded.
1384  *
1385  **/
1386 static void igb_get_hw_control(struct igb_adapter *adapter)
1387 {
1388         struct e1000_hw *hw = &adapter->hw;
1389         u32 ctrl_ext;
1390
1391         /* Let firmware know the driver has taken over */
1392         ctrl_ext = rd32(E1000_CTRL_EXT);
1393         wr32(E1000_CTRL_EXT,
1394                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1395 }
1396
1397 /**
1398  * igb_configure - configure the hardware for RX and TX
1399  * @adapter: private board structure
1400  **/
1401 static void igb_configure(struct igb_adapter *adapter)
1402 {
1403         struct net_device *netdev = adapter->netdev;
1404         int i;
1405
1406         igb_get_hw_control(adapter);
1407         igb_set_rx_mode(netdev);
1408
1409         igb_restore_vlan(adapter);
1410
1411         igb_setup_tctl(adapter);
1412         igb_setup_mrqc(adapter);
1413         igb_setup_rctl(adapter);
1414
1415         igb_configure_tx(adapter);
1416         igb_configure_rx(adapter);
1417
1418         igb_rx_fifo_flush_82575(&adapter->hw);
1419
1420         /* call igb_desc_unused which always leaves
1421          * at least 1 descriptor unused to make sure
1422          * next_to_use != next_to_clean */
1423         for (i = 0; i < adapter->num_rx_queues; i++) {
1424                 struct igb_ring *ring = adapter->rx_ring[i];
1425                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1426         }
1427 }
1428
1429 /**
1430  * igb_power_up_link - Power up the phy/serdes link
1431  * @adapter: address of board private structure
1432  **/
1433 void igb_power_up_link(struct igb_adapter *adapter)
1434 {
1435         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1436                 igb_power_up_phy_copper(&adapter->hw);
1437         else
1438                 igb_power_up_serdes_link_82575(&adapter->hw);
1439 }
1440
1441 /**
1442  * igb_power_down_link - Power down the phy/serdes link
1443  * @adapter: address of board private structure
1444  */
1445 static void igb_power_down_link(struct igb_adapter *adapter)
1446 {
1447         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1448                 igb_power_down_phy_copper_82575(&adapter->hw);
1449         else
1450                 igb_shutdown_serdes_link_82575(&adapter->hw);
1451 }
1452
1453 /**
1454  * igb_up - Open the interface and prepare it to handle traffic
1455  * @adapter: board private structure
1456  **/
1457 int igb_up(struct igb_adapter *adapter)
1458 {
1459         struct e1000_hw *hw = &adapter->hw;
1460         int i;
1461
1462         /* hardware has been reset, we need to reload some things */
1463         igb_configure(adapter);
1464
1465         clear_bit(__IGB_DOWN, &adapter->state);
1466
1467         for (i = 0; i < adapter->num_q_vectors; i++) {
1468                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1469                 napi_enable(&q_vector->napi);
1470         }
1471         if (adapter->msix_entries)
1472                 igb_configure_msix(adapter);
1473         else
1474                 igb_assign_vector(adapter->q_vector[0], 0);
1475
1476         /* Clear any pending interrupts. */
1477         rd32(E1000_ICR);
1478         igb_irq_enable(adapter);
1479
1480         /* notify VFs that reset has been completed */
1481         if (adapter->vfs_allocated_count) {
1482                 u32 reg_data = rd32(E1000_CTRL_EXT);
1483                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1484                 wr32(E1000_CTRL_EXT, reg_data);
1485         }
1486
1487         netif_tx_start_all_queues(adapter->netdev);
1488
1489         /* start the watchdog. */
1490         hw->mac.get_link_status = 1;
1491         schedule_work(&adapter->watchdog_task);
1492
1493         return 0;
1494 }
1495
1496 void igb_down(struct igb_adapter *adapter)
1497 {
1498         struct net_device *netdev = adapter->netdev;
1499         struct e1000_hw *hw = &adapter->hw;
1500         u32 tctl, rctl;
1501         int i;
1502
1503         /* signal that we're down so the interrupt handler does not
1504          * reschedule our watchdog timer */
1505         set_bit(__IGB_DOWN, &adapter->state);
1506
1507         /* disable receives in the hardware */
1508         rctl = rd32(E1000_RCTL);
1509         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1510         /* flush and sleep below */
1511
1512         netif_tx_stop_all_queues(netdev);
1513
1514         /* disable transmits in the hardware */
1515         tctl = rd32(E1000_TCTL);
1516         tctl &= ~E1000_TCTL_EN;
1517         wr32(E1000_TCTL, tctl);
1518         /* flush both disables and wait for them to finish */
1519         wrfl();
1520         msleep(10);
1521
1522         for (i = 0; i < adapter->num_q_vectors; i++) {
1523                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1524                 napi_disable(&q_vector->napi);
1525         }
1526
1527         igb_irq_disable(adapter);
1528
1529         del_timer_sync(&adapter->watchdog_timer);
1530         del_timer_sync(&adapter->phy_info_timer);
1531
1532         netif_carrier_off(netdev);
1533
1534         /* record the stats before reset*/
1535         igb_update_stats(adapter);
1536
1537         adapter->link_speed = 0;
1538         adapter->link_duplex = 0;
1539
1540         if (!pci_channel_offline(adapter->pdev))
1541                 igb_reset(adapter);
1542         igb_clean_all_tx_rings(adapter);
1543         igb_clean_all_rx_rings(adapter);
1544 #ifdef CONFIG_IGB_DCA
1545
1546         /* since we reset the hardware DCA settings were cleared */
1547         igb_setup_dca(adapter);
1548 #endif
1549 }
1550
1551 void igb_reinit_locked(struct igb_adapter *adapter)
1552 {
1553         WARN_ON(in_interrupt());
1554         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1555                 msleep(1);
1556         igb_down(adapter);
1557         igb_up(adapter);
1558         clear_bit(__IGB_RESETTING, &adapter->state);
1559 }
1560
1561 void igb_reset(struct igb_adapter *adapter)
1562 {
1563         struct pci_dev *pdev = adapter->pdev;
1564         struct e1000_hw *hw = &adapter->hw;
1565         struct e1000_mac_info *mac = &hw->mac;
1566         struct e1000_fc_info *fc = &hw->fc;
1567         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1568         u16 hwm;
1569
1570         /* Repartition Pba for greater than 9k mtu
1571          * To take effect CTRL.RST is required.
1572          */
1573         switch (mac->type) {
1574         case e1000_i350:
1575         case e1000_82580:
1576                 pba = rd32(E1000_RXPBS);
1577                 pba = igb_rxpbs_adjust_82580(pba);
1578                 break;
1579         case e1000_82576:
1580                 pba = rd32(E1000_RXPBS);
1581                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1582                 break;
1583         case e1000_82575:
1584         default:
1585                 pba = E1000_PBA_34K;
1586                 break;
1587         }
1588
1589         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1590             (mac->type < e1000_82576)) {
1591                 /* adjust PBA for jumbo frames */
1592                 wr32(E1000_PBA, pba);
1593
1594                 /* To maintain wire speed transmits, the Tx FIFO should be
1595                  * large enough to accommodate two full transmit packets,
1596                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1597                  * the Rx FIFO should be large enough to accommodate at least
1598                  * one full receive packet and is similarly rounded up and
1599                  * expressed in KB. */
1600                 pba = rd32(E1000_PBA);
1601                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1602                 tx_space = pba >> 16;
1603                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1604                 pba &= 0xffff;
1605                 /* the tx fifo also stores 16 bytes of information about the tx
1606                  * but don't include ethernet FCS because hardware appends it */
1607                 min_tx_space = (adapter->max_frame_size +
1608                                 sizeof(union e1000_adv_tx_desc) -
1609                                 ETH_FCS_LEN) * 2;
1610                 min_tx_space = ALIGN(min_tx_space, 1024);
1611                 min_tx_space >>= 10;
1612                 /* software strips receive CRC, so leave room for it */
1613                 min_rx_space = adapter->max_frame_size;
1614                 min_rx_space = ALIGN(min_rx_space, 1024);
1615                 min_rx_space >>= 10;
1616
1617                 /* If current Tx allocation is less than the min Tx FIFO size,
1618                  * and the min Tx FIFO size is less than the current Rx FIFO
1619                  * allocation, take space away from current Rx allocation */
1620                 if (tx_space < min_tx_space &&
1621                     ((min_tx_space - tx_space) < pba)) {
1622                         pba = pba - (min_tx_space - tx_space);
1623
1624                         /* if short on rx space, rx wins and must trump tx
1625                          * adjustment */
1626                         if (pba < min_rx_space)
1627                                 pba = min_rx_space;
1628                 }
1629                 wr32(E1000_PBA, pba);
1630         }
1631
1632         /* flow control settings */
1633         /* The high water mark must be low enough to fit one full frame
1634          * (or the size used for early receive) above it in the Rx FIFO.
1635          * Set it to the lower of:
1636          * - 90% of the Rx FIFO size, or
1637          * - the full Rx FIFO size minus one full frame */
1638         hwm = min(((pba << 10) * 9 / 10),
1639                         ((pba << 10) - 2 * adapter->max_frame_size));
1640
1641         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1642         fc->low_water = fc->high_water - 16;
1643         fc->pause_time = 0xFFFF;
1644         fc->send_xon = 1;
1645         fc->current_mode = fc->requested_mode;
1646
1647         /* disable receive for all VFs and wait one second */
1648         if (adapter->vfs_allocated_count) {
1649                 int i;
1650                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1651                         adapter->vf_data[i].flags = 0;
1652
1653                 /* ping all the active vfs to let them know we are going down */
1654                 igb_ping_all_vfs(adapter);
1655
1656                 /* disable transmits and receives */
1657                 wr32(E1000_VFRE, 0);
1658                 wr32(E1000_VFTE, 0);
1659         }
1660
1661         /* Allow time for pending master requests to run */
1662         hw->mac.ops.reset_hw(hw);
1663         wr32(E1000_WUC, 0);
1664
1665         if (hw->mac.ops.init_hw(hw))
1666                 dev_err(&pdev->dev, "Hardware Error\n");
1667
1668         if (hw->mac.type == e1000_82580) {
1669                 u32 reg = rd32(E1000_PCIEMISC);
1670                 wr32(E1000_PCIEMISC,
1671                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1672         }
1673         if (!netif_running(adapter->netdev))
1674                 igb_power_down_link(adapter);
1675
1676         igb_update_mng_vlan(adapter);
1677
1678         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1679         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1680
1681         igb_get_phy_info(hw);
1682 }
1683
1684 static const struct net_device_ops igb_netdev_ops = {
1685         .ndo_open               = igb_open,
1686         .ndo_stop               = igb_close,
1687         .ndo_start_xmit         = igb_xmit_frame_adv,
1688         .ndo_get_stats          = igb_get_stats,
1689         .ndo_set_rx_mode        = igb_set_rx_mode,
1690         .ndo_set_multicast_list = igb_set_rx_mode,
1691         .ndo_set_mac_address    = igb_set_mac,
1692         .ndo_change_mtu         = igb_change_mtu,
1693         .ndo_do_ioctl           = igb_ioctl,
1694         .ndo_tx_timeout         = igb_tx_timeout,
1695         .ndo_validate_addr      = eth_validate_addr,
1696         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1697         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1698         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1699         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1700         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1701         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1702         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1703 #ifdef CONFIG_NET_POLL_CONTROLLER
1704         .ndo_poll_controller    = igb_netpoll,
1705 #endif
1706 };
1707
1708 /**
1709  * igb_probe - Device Initialization Routine
1710  * @pdev: PCI device information struct
1711  * @ent: entry in igb_pci_tbl
1712  *
1713  * Returns 0 on success, negative on failure
1714  *
1715  * igb_probe initializes an adapter identified by a pci_dev structure.
1716  * The OS initialization, configuring of the adapter private structure,
1717  * and a hardware reset occur.
1718  **/
1719 static int __devinit igb_probe(struct pci_dev *pdev,
1720                                const struct pci_device_id *ent)
1721 {
1722         struct net_device *netdev;
1723         struct igb_adapter *adapter;
1724         struct e1000_hw *hw;
1725         u16 eeprom_data = 0;
1726         static int global_quad_port_a; /* global quad port a indication */
1727         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1728         unsigned long mmio_start, mmio_len;
1729         int err, pci_using_dac;
1730         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1731         u32 part_num;
1732
1733         /* Catch broken hardware that put the wrong VF device ID in
1734          * the PCIe SR-IOV capability.
1735          */
1736         if (pdev->is_virtfn) {
1737                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1738                      pci_name(pdev), pdev->vendor, pdev->device);
1739                 return -EINVAL;
1740         }
1741
1742         err = pci_enable_device_mem(pdev);
1743         if (err)
1744                 return err;
1745
1746         pci_using_dac = 0;
1747         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1748         if (!err) {
1749                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1750                 if (!err)
1751                         pci_using_dac = 1;
1752         } else {
1753                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1754                 if (err) {
1755                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1756                         if (err) {
1757                                 dev_err(&pdev->dev, "No usable DMA "
1758                                         "configuration, aborting\n");
1759                                 goto err_dma;
1760                         }
1761                 }
1762         }
1763
1764         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1765                                            IORESOURCE_MEM),
1766                                            igb_driver_name);
1767         if (err)
1768                 goto err_pci_reg;
1769
1770         pci_enable_pcie_error_reporting(pdev);
1771
1772         pci_set_master(pdev);
1773         pci_save_state(pdev);
1774
1775         err = -ENOMEM;
1776         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1777                                    IGB_ABS_MAX_TX_QUEUES);
1778         if (!netdev)
1779                 goto err_alloc_etherdev;
1780
1781         SET_NETDEV_DEV(netdev, &pdev->dev);
1782
1783         pci_set_drvdata(pdev, netdev);
1784         adapter = netdev_priv(netdev);
1785         adapter->netdev = netdev;
1786         adapter->pdev = pdev;
1787         hw = &adapter->hw;
1788         hw->back = adapter;
1789         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1790
1791         mmio_start = pci_resource_start(pdev, 0);
1792         mmio_len = pci_resource_len(pdev, 0);
1793
1794         err = -EIO;
1795         hw->hw_addr = ioremap(mmio_start, mmio_len);
1796         if (!hw->hw_addr)
1797                 goto err_ioremap;
1798
1799         netdev->netdev_ops = &igb_netdev_ops;
1800         igb_set_ethtool_ops(netdev);
1801         netdev->watchdog_timeo = 5 * HZ;
1802
1803         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1804
1805         netdev->mem_start = mmio_start;
1806         netdev->mem_end = mmio_start + mmio_len;
1807
1808         /* PCI config space info */
1809         hw->vendor_id = pdev->vendor;
1810         hw->device_id = pdev->device;
1811         hw->revision_id = pdev->revision;
1812         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1813         hw->subsystem_device_id = pdev->subsystem_device;
1814
1815         /* Copy the default MAC, PHY and NVM function pointers */
1816         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1817         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1818         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1819         /* Initialize skew-specific constants */
1820         err = ei->get_invariants(hw);
1821         if (err)
1822                 goto err_sw_init;
1823
1824         /* setup the private structure */
1825         err = igb_sw_init(adapter);
1826         if (err)
1827                 goto err_sw_init;
1828
1829         igb_get_bus_info_pcie(hw);
1830
1831         hw->phy.autoneg_wait_to_complete = false;
1832
1833         /* Copper options */
1834         if (hw->phy.media_type == e1000_media_type_copper) {
1835                 hw->phy.mdix = AUTO_ALL_MODES;
1836                 hw->phy.disable_polarity_correction = false;
1837                 hw->phy.ms_type = e1000_ms_hw_default;
1838         }
1839
1840         if (igb_check_reset_block(hw))
1841                 dev_info(&pdev->dev,
1842                         "PHY reset is blocked due to SOL/IDER session.\n");
1843
1844         netdev->features = NETIF_F_SG |
1845                            NETIF_F_IP_CSUM |
1846                            NETIF_F_HW_VLAN_TX |
1847                            NETIF_F_HW_VLAN_RX |
1848                            NETIF_F_HW_VLAN_FILTER;
1849
1850         netdev->features |= NETIF_F_IPV6_CSUM;
1851         netdev->features |= NETIF_F_TSO;
1852         netdev->features |= NETIF_F_TSO6;
1853         netdev->features |= NETIF_F_GRO;
1854
1855         netdev->vlan_features |= NETIF_F_TSO;
1856         netdev->vlan_features |= NETIF_F_TSO6;
1857         netdev->vlan_features |= NETIF_F_IP_CSUM;
1858         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1859         netdev->vlan_features |= NETIF_F_SG;
1860
1861         if (pci_using_dac) {
1862                 netdev->features |= NETIF_F_HIGHDMA;
1863                 netdev->vlan_features |= NETIF_F_HIGHDMA;
1864         }
1865
1866         if (hw->mac.type >= e1000_82576)
1867                 netdev->features |= NETIF_F_SCTP_CSUM;
1868
1869         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1870
1871         /* before reading the NVM, reset the controller to put the device in a
1872          * known good starting state */
1873         hw->mac.ops.reset_hw(hw);
1874
1875         /* make sure the NVM is good */
1876         if (igb_validate_nvm_checksum(hw) < 0) {
1877                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1878                 err = -EIO;
1879                 goto err_eeprom;
1880         }
1881
1882         /* copy the MAC address out of the NVM */
1883         if (hw->mac.ops.read_mac_addr(hw))
1884                 dev_err(&pdev->dev, "NVM Read Error\n");
1885
1886         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1887         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1888
1889         if (!is_valid_ether_addr(netdev->perm_addr)) {
1890                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1891                 err = -EIO;
1892                 goto err_eeprom;
1893         }
1894
1895         setup_timer(&adapter->watchdog_timer, igb_watchdog,
1896                     (unsigned long) adapter);
1897         setup_timer(&adapter->phy_info_timer, igb_update_phy_info,
1898                     (unsigned long) adapter);
1899
1900         INIT_WORK(&adapter->reset_task, igb_reset_task);
1901         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1902
1903         /* Initialize link properties that are user-changeable */
1904         adapter->fc_autoneg = true;
1905         hw->mac.autoneg = true;
1906         hw->phy.autoneg_advertised = 0x2f;
1907
1908         hw->fc.requested_mode = e1000_fc_default;
1909         hw->fc.current_mode = e1000_fc_default;
1910
1911         igb_validate_mdi_setting(hw);
1912
1913         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1914          * enable the ACPI Magic Packet filter
1915          */
1916
1917         if (hw->bus.func == 0)
1918                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1919         else if (hw->mac.type == e1000_82580)
1920                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1921                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1922                                  &eeprom_data);
1923         else if (hw->bus.func == 1)
1924                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1925
1926         if (eeprom_data & eeprom_apme_mask)
1927                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1928
1929         /* now that we have the eeprom settings, apply the special cases where
1930          * the eeprom may be wrong or the board simply won't support wake on
1931          * lan on a particular port */
1932         switch (pdev->device) {
1933         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1934                 adapter->eeprom_wol = 0;
1935                 break;
1936         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1937         case E1000_DEV_ID_82576_FIBER:
1938         case E1000_DEV_ID_82576_SERDES:
1939                 /* Wake events only supported on port A for dual fiber
1940                  * regardless of eeprom setting */
1941                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1942                         adapter->eeprom_wol = 0;
1943                 break;
1944         case E1000_DEV_ID_82576_QUAD_COPPER:
1945         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1946                 /* if quad port adapter, disable WoL on all but port A */
1947                 if (global_quad_port_a != 0)
1948                         adapter->eeprom_wol = 0;
1949                 else
1950                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1951                 /* Reset for multiple quad port adapters */
1952                 if (++global_quad_port_a == 4)
1953                         global_quad_port_a = 0;
1954                 break;
1955         }
1956
1957         /* initialize the wol settings based on the eeprom settings */
1958         adapter->wol = adapter->eeprom_wol;
1959         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1960
1961         /* reset the hardware with the new settings */
1962         igb_reset(adapter);
1963
1964         /* let the f/w know that the h/w is now under the control of the
1965          * driver. */
1966         igb_get_hw_control(adapter);
1967
1968         strcpy(netdev->name, "eth%d");
1969         err = register_netdev(netdev);
1970         if (err)
1971                 goto err_register;
1972
1973         /* carrier off reporting is important to ethtool even BEFORE open */
1974         netif_carrier_off(netdev);
1975
1976 #ifdef CONFIG_IGB_DCA
1977         if (dca_add_requester(&pdev->dev) == 0) {
1978                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1979                 dev_info(&pdev->dev, "DCA enabled\n");
1980                 igb_setup_dca(adapter);
1981         }
1982
1983 #endif
1984         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1985         /* print bus type/speed/width info */
1986         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1987                  netdev->name,
1988                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1989                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1990                                                             "unknown"),
1991                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1992                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1993                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1994                    "unknown"),
1995                  netdev->dev_addr);
1996
1997         igb_read_part_num(hw, &part_num);
1998         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1999                 (part_num >> 8), (part_num & 0xff));
2000
2001         dev_info(&pdev->dev,
2002                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
2003                 adapter->msix_entries ? "MSI-X" :
2004                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
2005                 adapter->num_rx_queues, adapter->num_tx_queues);
2006
2007         return 0;
2008
2009 err_register:
2010         igb_release_hw_control(adapter);
2011 err_eeprom:
2012         if (!igb_check_reset_block(hw))
2013                 igb_reset_phy(hw);
2014
2015         if (hw->flash_address)
2016                 iounmap(hw->flash_address);
2017 err_sw_init:
2018         igb_clear_interrupt_scheme(adapter);
2019         iounmap(hw->hw_addr);
2020 err_ioremap:
2021         free_netdev(netdev);
2022 err_alloc_etherdev:
2023         pci_release_selected_regions(pdev,
2024                                      pci_select_bars(pdev, IORESOURCE_MEM));
2025 err_pci_reg:
2026 err_dma:
2027         pci_disable_device(pdev);
2028         return err;
2029 }
2030
2031 /**
2032  * igb_remove - Device Removal Routine
2033  * @pdev: PCI device information struct
2034  *
2035  * igb_remove is called by the PCI subsystem to alert the driver
2036  * that it should release a PCI device.  The could be caused by a
2037  * Hot-Plug event, or because the driver is going to be removed from
2038  * memory.
2039  **/
2040 static void __devexit igb_remove(struct pci_dev *pdev)
2041 {
2042         struct net_device *netdev = pci_get_drvdata(pdev);
2043         struct igb_adapter *adapter = netdev_priv(netdev);
2044         struct e1000_hw *hw = &adapter->hw;
2045
2046         /* flush_scheduled work may reschedule our watchdog task, so
2047          * explicitly disable watchdog tasks from being rescheduled  */
2048         set_bit(__IGB_DOWN, &adapter->state);
2049         del_timer_sync(&adapter->watchdog_timer);
2050         del_timer_sync(&adapter->phy_info_timer);
2051
2052         flush_scheduled_work();
2053
2054 #ifdef CONFIG_IGB_DCA
2055         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2056                 dev_info(&pdev->dev, "DCA disabled\n");
2057                 dca_remove_requester(&pdev->dev);
2058                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2059                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2060         }
2061 #endif
2062
2063         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2064          * would have already happened in close and is redundant. */
2065         igb_release_hw_control(adapter);
2066
2067         unregister_netdev(netdev);
2068
2069         igb_clear_interrupt_scheme(adapter);
2070
2071 #ifdef CONFIG_PCI_IOV
2072         /* reclaim resources allocated to VFs */
2073         if (adapter->vf_data) {
2074                 /* disable iov and allow time for transactions to clear */
2075                 pci_disable_sriov(pdev);
2076                 msleep(500);
2077
2078                 kfree(adapter->vf_data);
2079                 adapter->vf_data = NULL;
2080                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2081                 msleep(100);
2082                 dev_info(&pdev->dev, "IOV Disabled\n");
2083         }
2084 #endif
2085
2086         iounmap(hw->hw_addr);
2087         if (hw->flash_address)
2088                 iounmap(hw->flash_address);
2089         pci_release_selected_regions(pdev,
2090                                      pci_select_bars(pdev, IORESOURCE_MEM));
2091
2092         free_netdev(netdev);
2093
2094         pci_disable_pcie_error_reporting(pdev);
2095
2096         pci_disable_device(pdev);
2097 }
2098
2099 /**
2100  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2101  * @adapter: board private structure to initialize
2102  *
2103  * This function initializes the vf specific data storage and then attempts to
2104  * allocate the VFs.  The reason for ordering it this way is because it is much
2105  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2106  * the memory for the VFs.
2107  **/
2108 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2109 {
2110 #ifdef CONFIG_PCI_IOV
2111         struct pci_dev *pdev = adapter->pdev;
2112
2113         if (adapter->vfs_allocated_count) {
2114                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2115                                            sizeof(struct vf_data_storage),
2116                                            GFP_KERNEL);
2117                 /* if allocation failed then we do not support SR-IOV */
2118                 if (!adapter->vf_data) {
2119                         adapter->vfs_allocated_count = 0;
2120                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2121                                 "Data Storage\n");
2122                 }
2123         }
2124
2125         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2126                 kfree(adapter->vf_data);
2127                 adapter->vf_data = NULL;
2128 #endif /* CONFIG_PCI_IOV */
2129                 adapter->vfs_allocated_count = 0;
2130 #ifdef CONFIG_PCI_IOV
2131         } else {
2132                 unsigned char mac_addr[ETH_ALEN];
2133                 int i;
2134                 dev_info(&pdev->dev, "%d vfs allocated\n",
2135                          adapter->vfs_allocated_count);
2136                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2137                         random_ether_addr(mac_addr);
2138                         igb_set_vf_mac(adapter, i, mac_addr);
2139                 }
2140         }
2141 #endif /* CONFIG_PCI_IOV */
2142 }
2143
2144
2145 /**
2146  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2147  * @adapter: board private structure to initialize
2148  *
2149  * igb_init_hw_timer initializes the function pointer and values for the hw
2150  * timer found in hardware.
2151  **/
2152 static void igb_init_hw_timer(struct igb_adapter *adapter)
2153 {
2154         struct e1000_hw *hw = &adapter->hw;
2155
2156         switch (hw->mac.type) {
2157         case e1000_i350:
2158         case e1000_82580:
2159                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2160                 adapter->cycles.read = igb_read_clock;
2161                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2162                 adapter->cycles.mult = 1;
2163                 /*
2164                  * The 82580 timesync updates the system timer every 8ns by 8ns
2165                  * and the value cannot be shifted.  Instead we need to shift
2166                  * the registers to generate a 64bit timer value.  As a result
2167                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2168                  * 24 in order to generate a larger value for synchronization.
2169                  */
2170                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2171                 /* disable system timer temporarily by setting bit 31 */
2172                 wr32(E1000_TSAUXC, 0x80000000);
2173                 wrfl();
2174
2175                 /* Set registers so that rollover occurs soon to test this. */
2176                 wr32(E1000_SYSTIMR, 0x00000000);
2177                 wr32(E1000_SYSTIML, 0x80000000);
2178                 wr32(E1000_SYSTIMH, 0x000000FF);
2179                 wrfl();
2180
2181                 /* enable system timer by clearing bit 31 */
2182                 wr32(E1000_TSAUXC, 0x0);
2183                 wrfl();
2184
2185                 timecounter_init(&adapter->clock,
2186                                  &adapter->cycles,
2187                                  ktime_to_ns(ktime_get_real()));
2188                 /*
2189                  * Synchronize our NIC clock against system wall clock. NIC
2190                  * time stamp reading requires ~3us per sample, each sample
2191                  * was pretty stable even under load => only require 10
2192                  * samples for each offset comparison.
2193                  */
2194                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2195                 adapter->compare.source = &adapter->clock;
2196                 adapter->compare.target = ktime_get_real;
2197                 adapter->compare.num_samples = 10;
2198                 timecompare_update(&adapter->compare, 0);
2199                 break;
2200         case e1000_82576:
2201                 /*
2202                  * Initialize hardware timer: we keep it running just in case
2203                  * that some program needs it later on.
2204                  */
2205                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2206                 adapter->cycles.read = igb_read_clock;
2207                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2208                 adapter->cycles.mult = 1;
2209                 /**
2210                  * Scale the NIC clock cycle by a large factor so that
2211                  * relatively small clock corrections can be added or
2212                  * substracted at each clock tick. The drawbacks of a large
2213                  * factor are a) that the clock register overflows more quickly
2214                  * (not such a big deal) and b) that the increment per tick has
2215                  * to fit into 24 bits.  As a result we need to use a shift of
2216                  * 19 so we can fit a value of 16 into the TIMINCA register.
2217                  */
2218                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2219                 wr32(E1000_TIMINCA,
2220                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2221                                 (16 << IGB_82576_TSYNC_SHIFT));
2222
2223                 /* Set registers so that rollover occurs soon to test this. */
2224                 wr32(E1000_SYSTIML, 0x00000000);
2225                 wr32(E1000_SYSTIMH, 0xFF800000);
2226                 wrfl();
2227
2228                 timecounter_init(&adapter->clock,
2229                                  &adapter->cycles,
2230                                  ktime_to_ns(ktime_get_real()));
2231                 /*
2232                  * Synchronize our NIC clock against system wall clock. NIC
2233                  * time stamp reading requires ~3us per sample, each sample
2234                  * was pretty stable even under load => only require 10
2235                  * samples for each offset comparison.
2236                  */
2237                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2238                 adapter->compare.source = &adapter->clock;
2239                 adapter->compare.target = ktime_get_real;
2240                 adapter->compare.num_samples = 10;
2241                 timecompare_update(&adapter->compare, 0);
2242                 break;
2243         case e1000_82575:
2244                 /* 82575 does not support timesync */
2245         default:
2246                 break;
2247         }
2248
2249 }
2250
2251 /**
2252  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2253  * @adapter: board private structure to initialize
2254  *
2255  * igb_sw_init initializes the Adapter private data structure.
2256  * Fields are initialized based on PCI device information and
2257  * OS network device settings (MTU size).
2258  **/
2259 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2260 {
2261         struct e1000_hw *hw = &adapter->hw;
2262         struct net_device *netdev = adapter->netdev;
2263         struct pci_dev *pdev = adapter->pdev;
2264
2265         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2266
2267         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2268         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2269         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2270         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2271
2272         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2273         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2274
2275 #ifdef CONFIG_PCI_IOV
2276         if (hw->mac.type == e1000_82576)
2277                 adapter->vfs_allocated_count = (max_vfs > 7) ? 7 : max_vfs;
2278
2279 #endif /* CONFIG_PCI_IOV */
2280         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2281
2282         /*
2283          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2284          * then we should combine the queues into a queue pair in order to
2285          * conserve interrupts due to limited supply
2286          */
2287         if ((adapter->rss_queues > 4) ||
2288             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2289                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2290
2291         /* This call may decrease the number of queues */
2292         if (igb_init_interrupt_scheme(adapter)) {
2293                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2294                 return -ENOMEM;
2295         }
2296
2297         igb_init_hw_timer(adapter);
2298         igb_probe_vfs(adapter);
2299
2300         /* Explicitly disable IRQ since the NIC can be in any state. */
2301         igb_irq_disable(adapter);
2302
2303         set_bit(__IGB_DOWN, &adapter->state);
2304         return 0;
2305 }
2306
2307 /**
2308  * igb_open - Called when a network interface is made active
2309  * @netdev: network interface device structure
2310  *
2311  * Returns 0 on success, negative value on failure
2312  *
2313  * The open entry point is called when a network interface is made
2314  * active by the system (IFF_UP).  At this point all resources needed
2315  * for transmit and receive operations are allocated, the interrupt
2316  * handler is registered with the OS, the watchdog timer is started,
2317  * and the stack is notified that the interface is ready.
2318  **/
2319 static int igb_open(struct net_device *netdev)
2320 {
2321         struct igb_adapter *adapter = netdev_priv(netdev);
2322         struct e1000_hw *hw = &adapter->hw;
2323         int err;
2324         int i;
2325
2326         /* disallow open during test */
2327         if (test_bit(__IGB_TESTING, &adapter->state))
2328                 return -EBUSY;
2329
2330         netif_carrier_off(netdev);
2331
2332         /* allocate transmit descriptors */
2333         err = igb_setup_all_tx_resources(adapter);
2334         if (err)
2335                 goto err_setup_tx;
2336
2337         /* allocate receive descriptors */
2338         err = igb_setup_all_rx_resources(adapter);
2339         if (err)
2340                 goto err_setup_rx;
2341
2342         igb_power_up_link(adapter);
2343
2344         /* before we allocate an interrupt, we must be ready to handle it.
2345          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2346          * as soon as we call pci_request_irq, so we have to setup our
2347          * clean_rx handler before we do so.  */
2348         igb_configure(adapter);
2349
2350         err = igb_request_irq(adapter);
2351         if (err)
2352                 goto err_req_irq;
2353
2354         /* From here on the code is the same as igb_up() */
2355         clear_bit(__IGB_DOWN, &adapter->state);
2356
2357         for (i = 0; i < adapter->num_q_vectors; i++) {
2358                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2359                 napi_enable(&q_vector->napi);
2360         }
2361
2362         /* Clear any pending interrupts. */
2363         rd32(E1000_ICR);
2364
2365         igb_irq_enable(adapter);
2366
2367         /* notify VFs that reset has been completed */
2368         if (adapter->vfs_allocated_count) {
2369                 u32 reg_data = rd32(E1000_CTRL_EXT);
2370                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2371                 wr32(E1000_CTRL_EXT, reg_data);
2372         }
2373
2374         netif_tx_start_all_queues(netdev);
2375
2376         /* start the watchdog. */
2377         hw->mac.get_link_status = 1;
2378         schedule_work(&adapter->watchdog_task);
2379
2380         return 0;
2381
2382 err_req_irq:
2383         igb_release_hw_control(adapter);
2384         igb_power_down_link(adapter);
2385         igb_free_all_rx_resources(adapter);
2386 err_setup_rx:
2387         igb_free_all_tx_resources(adapter);
2388 err_setup_tx:
2389         igb_reset(adapter);
2390
2391         return err;
2392 }
2393
2394 /**
2395  * igb_close - Disables a network interface
2396  * @netdev: network interface device structure
2397  *
2398  * Returns 0, this is not allowed to fail
2399  *
2400  * The close entry point is called when an interface is de-activated
2401  * by the OS.  The hardware is still under the driver's control, but
2402  * needs to be disabled.  A global MAC reset is issued to stop the
2403  * hardware, and all transmit and receive resources are freed.
2404  **/
2405 static int igb_close(struct net_device *netdev)
2406 {
2407         struct igb_adapter *adapter = netdev_priv(netdev);
2408
2409         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2410         igb_down(adapter);
2411
2412         igb_free_irq(adapter);
2413
2414         igb_free_all_tx_resources(adapter);
2415         igb_free_all_rx_resources(adapter);
2416
2417         return 0;
2418 }
2419
2420 /**
2421  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2422  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2423  *
2424  * Return 0 on success, negative on failure
2425  **/
2426 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2427 {
2428         struct device *dev = tx_ring->dev;
2429         int size;
2430
2431         size = sizeof(struct igb_buffer) * tx_ring->count;
2432         tx_ring->buffer_info = vmalloc(size);
2433         if (!tx_ring->buffer_info)
2434                 goto err;
2435         memset(tx_ring->buffer_info, 0, size);
2436
2437         /* round up to nearest 4K */
2438         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2439         tx_ring->size = ALIGN(tx_ring->size, 4096);
2440
2441         tx_ring->desc = dma_alloc_coherent(dev,
2442                                            tx_ring->size,
2443                                            &tx_ring->dma,
2444                                            GFP_KERNEL);
2445
2446         if (!tx_ring->desc)
2447                 goto err;
2448
2449         tx_ring->next_to_use = 0;
2450         tx_ring->next_to_clean = 0;
2451         return 0;
2452
2453 err:
2454         vfree(tx_ring->buffer_info);
2455         dev_err(dev,
2456                 "Unable to allocate memory for the transmit descriptor ring\n");
2457         return -ENOMEM;
2458 }
2459
2460 /**
2461  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2462  *                                (Descriptors) for all queues
2463  * @adapter: board private structure
2464  *
2465  * Return 0 on success, negative on failure
2466  **/
2467 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2468 {
2469         struct pci_dev *pdev = adapter->pdev;
2470         int i, err = 0;
2471
2472         for (i = 0; i < adapter->num_tx_queues; i++) {
2473                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2474                 if (err) {
2475                         dev_err(&pdev->dev,
2476                                 "Allocation for Tx Queue %u failed\n", i);
2477                         for (i--; i >= 0; i--)
2478                                 igb_free_tx_resources(adapter->tx_ring[i]);
2479                         break;
2480                 }
2481         }
2482
2483         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2484                 int r_idx = i % adapter->num_tx_queues;
2485                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2486         }
2487         return err;
2488 }
2489
2490 /**
2491  * igb_setup_tctl - configure the transmit control registers
2492  * @adapter: Board private structure
2493  **/
2494 void igb_setup_tctl(struct igb_adapter *adapter)
2495 {
2496         struct e1000_hw *hw = &adapter->hw;
2497         u32 tctl;
2498
2499         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2500         wr32(E1000_TXDCTL(0), 0);
2501
2502         /* Program the Transmit Control Register */
2503         tctl = rd32(E1000_TCTL);
2504         tctl &= ~E1000_TCTL_CT;
2505         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2506                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2507
2508         igb_config_collision_dist(hw);
2509
2510         /* Enable transmits */
2511         tctl |= E1000_TCTL_EN;
2512
2513         wr32(E1000_TCTL, tctl);
2514 }
2515
2516 /**
2517  * igb_configure_tx_ring - Configure transmit ring after Reset
2518  * @adapter: board private structure
2519  * @ring: tx ring to configure
2520  *
2521  * Configure a transmit ring after a reset.
2522  **/
2523 void igb_configure_tx_ring(struct igb_adapter *adapter,
2524                            struct igb_ring *ring)
2525 {
2526         struct e1000_hw *hw = &adapter->hw;
2527         u32 txdctl;
2528         u64 tdba = ring->dma;
2529         int reg_idx = ring->reg_idx;
2530
2531         /* disable the queue */
2532         txdctl = rd32(E1000_TXDCTL(reg_idx));
2533         wr32(E1000_TXDCTL(reg_idx),
2534                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2535         wrfl();
2536         mdelay(10);
2537
2538         wr32(E1000_TDLEN(reg_idx),
2539                         ring->count * sizeof(union e1000_adv_tx_desc));
2540         wr32(E1000_TDBAL(reg_idx),
2541                         tdba & 0x00000000ffffffffULL);
2542         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2543
2544         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2545         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2546         writel(0, ring->head);
2547         writel(0, ring->tail);
2548
2549         txdctl |= IGB_TX_PTHRESH;
2550         txdctl |= IGB_TX_HTHRESH << 8;
2551         txdctl |= IGB_TX_WTHRESH << 16;
2552
2553         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2554         wr32(E1000_TXDCTL(reg_idx), txdctl);
2555 }
2556
2557 /**
2558  * igb_configure_tx - Configure transmit Unit after Reset
2559  * @adapter: board private structure
2560  *
2561  * Configure the Tx unit of the MAC after a reset.
2562  **/
2563 static void igb_configure_tx(struct igb_adapter *adapter)
2564 {
2565         int i;
2566
2567         for (i = 0; i < adapter->num_tx_queues; i++)
2568                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2569 }
2570
2571 /**
2572  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2573  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2574  *
2575  * Returns 0 on success, negative on failure
2576  **/
2577 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2578 {
2579         struct device *dev = rx_ring->dev;
2580         int size, desc_len;
2581
2582         size = sizeof(struct igb_buffer) * rx_ring->count;
2583         rx_ring->buffer_info = vmalloc(size);
2584         if (!rx_ring->buffer_info)
2585                 goto err;
2586         memset(rx_ring->buffer_info, 0, size);
2587
2588         desc_len = sizeof(union e1000_adv_rx_desc);
2589
2590         /* Round up to nearest 4K */
2591         rx_ring->size = rx_ring->count * desc_len;
2592         rx_ring->size = ALIGN(rx_ring->size, 4096);
2593
2594         rx_ring->desc = dma_alloc_coherent(dev,
2595                                            rx_ring->size,
2596                                            &rx_ring->dma,
2597                                            GFP_KERNEL);
2598
2599         if (!rx_ring->desc)
2600                 goto err;
2601
2602         rx_ring->next_to_clean = 0;
2603         rx_ring->next_to_use = 0;
2604
2605         return 0;
2606
2607 err:
2608         vfree(rx_ring->buffer_info);
2609         rx_ring->buffer_info = NULL;
2610         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2611                 " ring\n");
2612         return -ENOMEM;
2613 }
2614
2615 /**
2616  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2617  *                                (Descriptors) for all queues
2618  * @adapter: board private structure
2619  *
2620  * Return 0 on success, negative on failure
2621  **/
2622 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2623 {
2624         struct pci_dev *pdev = adapter->pdev;
2625         int i, err = 0;
2626
2627         for (i = 0; i < adapter->num_rx_queues; i++) {
2628                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2629                 if (err) {
2630                         dev_err(&pdev->dev,
2631                                 "Allocation for Rx Queue %u failed\n", i);
2632                         for (i--; i >= 0; i--)
2633                                 igb_free_rx_resources(adapter->rx_ring[i]);
2634                         break;
2635                 }
2636         }
2637
2638         return err;
2639 }
2640
2641 /**
2642  * igb_setup_mrqc - configure the multiple receive queue control registers
2643  * @adapter: Board private structure
2644  **/
2645 static void igb_setup_mrqc(struct igb_adapter *adapter)
2646 {
2647         struct e1000_hw *hw = &adapter->hw;
2648         u32 mrqc, rxcsum;
2649         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2650         union e1000_reta {
2651                 u32 dword;
2652                 u8  bytes[4];
2653         } reta;
2654         static const u8 rsshash[40] = {
2655                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2656                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2657                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2658                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2659
2660         /* Fill out hash function seeds */
2661         for (j = 0; j < 10; j++) {
2662                 u32 rsskey = rsshash[(j * 4)];
2663                 rsskey |= rsshash[(j * 4) + 1] << 8;
2664                 rsskey |= rsshash[(j * 4) + 2] << 16;
2665                 rsskey |= rsshash[(j * 4) + 3] << 24;
2666                 array_wr32(E1000_RSSRK(0), j, rsskey);
2667         }
2668
2669         num_rx_queues = adapter->rss_queues;
2670
2671         if (adapter->vfs_allocated_count) {
2672                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2673                 switch (hw->mac.type) {
2674                 case e1000_i350:
2675                 case e1000_82580:
2676                         num_rx_queues = 1;
2677                         shift = 0;
2678                         break;
2679                 case e1000_82576:
2680                         shift = 3;
2681                         num_rx_queues = 2;
2682                         break;
2683                 case e1000_82575:
2684                         shift = 2;
2685                         shift2 = 6;
2686                 default:
2687                         break;
2688                 }
2689         } else {
2690                 if (hw->mac.type == e1000_82575)
2691                         shift = 6;
2692         }
2693
2694         for (j = 0; j < (32 * 4); j++) {
2695                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2696                 if (shift2)
2697                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2698                 if ((j & 3) == 3)
2699                         wr32(E1000_RETA(j >> 2), reta.dword);
2700         }
2701
2702         /*
2703          * Disable raw packet checksumming so that RSS hash is placed in
2704          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2705          * offloads as they are enabled by default
2706          */
2707         rxcsum = rd32(E1000_RXCSUM);
2708         rxcsum |= E1000_RXCSUM_PCSD;
2709
2710         if (adapter->hw.mac.type >= e1000_82576)
2711                 /* Enable Receive Checksum Offload for SCTP */
2712                 rxcsum |= E1000_RXCSUM_CRCOFL;
2713
2714         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2715         wr32(E1000_RXCSUM, rxcsum);
2716
2717         /* If VMDq is enabled then we set the appropriate mode for that, else
2718          * we default to RSS so that an RSS hash is calculated per packet even
2719          * if we are only using one queue */
2720         if (adapter->vfs_allocated_count) {
2721                 if (hw->mac.type > e1000_82575) {
2722                         /* Set the default pool for the PF's first queue */
2723                         u32 vtctl = rd32(E1000_VT_CTL);
2724                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2725                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2726                         vtctl |= adapter->vfs_allocated_count <<
2727                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2728                         wr32(E1000_VT_CTL, vtctl);
2729                 }
2730                 if (adapter->rss_queues > 1)
2731                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2732                 else
2733                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2734         } else {
2735                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2736         }
2737         igb_vmm_control(adapter);
2738
2739         /*
2740          * Generate RSS hash based on TCP port numbers and/or
2741          * IPv4/v6 src and dst addresses since UDP cannot be
2742          * hashed reliably due to IP fragmentation
2743          */
2744         mrqc |= E1000_MRQC_RSS_FIELD_IPV4 |
2745                 E1000_MRQC_RSS_FIELD_IPV4_TCP |
2746                 E1000_MRQC_RSS_FIELD_IPV6 |
2747                 E1000_MRQC_RSS_FIELD_IPV6_TCP |
2748                 E1000_MRQC_RSS_FIELD_IPV6_TCP_EX;
2749
2750         wr32(E1000_MRQC, mrqc);
2751 }
2752
2753 /**
2754  * igb_setup_rctl - configure the receive control registers
2755  * @adapter: Board private structure
2756  **/
2757 void igb_setup_rctl(struct igb_adapter *adapter)
2758 {
2759         struct e1000_hw *hw = &adapter->hw;
2760         u32 rctl;
2761
2762         rctl = rd32(E1000_RCTL);
2763
2764         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2765         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2766
2767         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2768                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2769
2770         /*
2771          * enable stripping of CRC. It's unlikely this will break BMC
2772          * redirection as it did with e1000. Newer features require
2773          * that the HW strips the CRC.
2774          */
2775         rctl |= E1000_RCTL_SECRC;
2776
2777         /* disable store bad packets and clear size bits. */
2778         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2779
2780         /* enable LPE to prevent packets larger than max_frame_size */
2781         rctl |= E1000_RCTL_LPE;
2782
2783         /* disable queue 0 to prevent tail write w/o re-config */
2784         wr32(E1000_RXDCTL(0), 0);
2785
2786         /* Attention!!!  For SR-IOV PF driver operations you must enable
2787          * queue drop for all VF and PF queues to prevent head of line blocking
2788          * if an un-trusted VF does not provide descriptors to hardware.
2789          */
2790         if (adapter->vfs_allocated_count) {
2791                 /* set all queue drop enable bits */
2792                 wr32(E1000_QDE, ALL_QUEUES);
2793         }
2794
2795         wr32(E1000_RCTL, rctl);
2796 }
2797
2798 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2799                                    int vfn)
2800 {
2801         struct e1000_hw *hw = &adapter->hw;
2802         u32 vmolr;
2803
2804         /* if it isn't the PF check to see if VFs are enabled and
2805          * increase the size to support vlan tags */
2806         if (vfn < adapter->vfs_allocated_count &&
2807             adapter->vf_data[vfn].vlans_enabled)
2808                 size += VLAN_TAG_SIZE;
2809
2810         vmolr = rd32(E1000_VMOLR(vfn));
2811         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2812         vmolr |= size | E1000_VMOLR_LPE;
2813         wr32(E1000_VMOLR(vfn), vmolr);
2814
2815         return 0;
2816 }
2817
2818 /**
2819  * igb_rlpml_set - set maximum receive packet size
2820  * @adapter: board private structure
2821  *
2822  * Configure maximum receivable packet size.
2823  **/
2824 static void igb_rlpml_set(struct igb_adapter *adapter)
2825 {
2826         u32 max_frame_size = adapter->max_frame_size;
2827         struct e1000_hw *hw = &adapter->hw;
2828         u16 pf_id = adapter->vfs_allocated_count;
2829
2830         if (adapter->vlgrp)
2831                 max_frame_size += VLAN_TAG_SIZE;
2832
2833         /* if vfs are enabled we set RLPML to the largest possible request
2834          * size and set the VMOLR RLPML to the size we need */
2835         if (pf_id) {
2836                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2837                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2838         }
2839
2840         wr32(E1000_RLPML, max_frame_size);
2841 }
2842
2843 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2844                                  int vfn, bool aupe)
2845 {
2846         struct e1000_hw *hw = &adapter->hw;
2847         u32 vmolr;
2848
2849         /*
2850          * This register exists only on 82576 and newer so if we are older then
2851          * we should exit and do nothing
2852          */
2853         if (hw->mac.type < e1000_82576)
2854                 return;
2855
2856         vmolr = rd32(E1000_VMOLR(vfn));
2857         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2858         if (aupe)
2859                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2860         else
2861                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2862
2863         /* clear all bits that might not be set */
2864         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2865
2866         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2867                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2868         /*
2869          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2870          * multicast packets
2871          */
2872         if (vfn <= adapter->vfs_allocated_count)
2873                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2874
2875         wr32(E1000_VMOLR(vfn), vmolr);
2876 }
2877
2878 /**
2879  * igb_configure_rx_ring - Configure a receive ring after Reset
2880  * @adapter: board private structure
2881  * @ring: receive ring to be configured
2882  *
2883  * Configure the Rx unit of the MAC after a reset.
2884  **/
2885 void igb_configure_rx_ring(struct igb_adapter *adapter,
2886                            struct igb_ring *ring)
2887 {
2888         struct e1000_hw *hw = &adapter->hw;
2889         u64 rdba = ring->dma;
2890         int reg_idx = ring->reg_idx;
2891         u32 srrctl, rxdctl;
2892
2893         /* disable the queue */
2894         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2895         wr32(E1000_RXDCTL(reg_idx),
2896                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2897
2898         /* Set DMA base address registers */
2899         wr32(E1000_RDBAL(reg_idx),
2900              rdba & 0x00000000ffffffffULL);
2901         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2902         wr32(E1000_RDLEN(reg_idx),
2903                        ring->count * sizeof(union e1000_adv_rx_desc));
2904
2905         /* initialize head and tail */
2906         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2907         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2908         writel(0, ring->head);
2909         writel(0, ring->tail);
2910
2911         /* set descriptor configuration */
2912         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2913                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2914                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2915 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2916                 srrctl |= IGB_RXBUFFER_16384 >>
2917                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2918 #else
2919                 srrctl |= (PAGE_SIZE / 2) >>
2920                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2921 #endif
2922                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2923         } else {
2924                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2925                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2926                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2927         }
2928         if (hw->mac.type == e1000_82580)
2929                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2930         /* Only set Drop Enable if we are supporting multiple queues */
2931         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2932                 srrctl |= E1000_SRRCTL_DROP_EN;
2933
2934         wr32(E1000_SRRCTL(reg_idx), srrctl);
2935
2936         /* set filtering for VMDQ pools */
2937         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2938
2939         /* enable receive descriptor fetching */
2940         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2941         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2942         rxdctl &= 0xFFF00000;
2943         rxdctl |= IGB_RX_PTHRESH;
2944         rxdctl |= IGB_RX_HTHRESH << 8;
2945         rxdctl |= IGB_RX_WTHRESH << 16;
2946         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2947 }
2948
2949 /**
2950  * igb_configure_rx - Configure receive Unit after Reset
2951  * @adapter: board private structure
2952  *
2953  * Configure the Rx unit of the MAC after a reset.
2954  **/
2955 static void igb_configure_rx(struct igb_adapter *adapter)
2956 {
2957         int i;
2958
2959         /* set UTA to appropriate mode */
2960         igb_set_uta(adapter);
2961
2962         /* set the correct pool for the PF default MAC address in entry 0 */
2963         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2964                          adapter->vfs_allocated_count);
2965
2966         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2967          * the Base and Length of the Rx Descriptor Ring */
2968         for (i = 0; i < adapter->num_rx_queues; i++)
2969                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2970 }
2971
2972 /**
2973  * igb_free_tx_resources - Free Tx Resources per Queue
2974  * @tx_ring: Tx descriptor ring for a specific queue
2975  *
2976  * Free all transmit software resources
2977  **/
2978 void igb_free_tx_resources(struct igb_ring *tx_ring)
2979 {
2980         igb_clean_tx_ring(tx_ring);
2981
2982         vfree(tx_ring->buffer_info);
2983         tx_ring->buffer_info = NULL;
2984
2985         /* if not set, then don't free */
2986         if (!tx_ring->desc)
2987                 return;
2988
2989         dma_free_coherent(tx_ring->dev, tx_ring->size,
2990                           tx_ring->desc, tx_ring->dma);
2991
2992         tx_ring->desc = NULL;
2993 }
2994
2995 /**
2996  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2997  * @adapter: board private structure
2998  *
2999  * Free all transmit software resources
3000  **/
3001 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
3002 {
3003         int i;
3004
3005         for (i = 0; i < adapter->num_tx_queues; i++)
3006                 igb_free_tx_resources(adapter->tx_ring[i]);
3007 }
3008
3009 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3010                                     struct igb_buffer *buffer_info)
3011 {
3012         if (buffer_info->dma) {
3013                 if (buffer_info->mapped_as_page)
3014                         dma_unmap_page(tx_ring->dev,
3015                                         buffer_info->dma,
3016                                         buffer_info->length,
3017                                         DMA_TO_DEVICE);
3018                 else
3019                         dma_unmap_single(tx_ring->dev,
3020                                         buffer_info->dma,
3021                                         buffer_info->length,
3022                                         DMA_TO_DEVICE);
3023                 buffer_info->dma = 0;
3024         }
3025         if (buffer_info->skb) {
3026                 dev_kfree_skb_any(buffer_info->skb);
3027                 buffer_info->skb = NULL;
3028         }
3029         buffer_info->time_stamp = 0;
3030         buffer_info->length = 0;
3031         buffer_info->next_to_watch = 0;
3032         buffer_info->mapped_as_page = false;
3033 }
3034
3035 /**
3036  * igb_clean_tx_ring - Free Tx Buffers
3037  * @tx_ring: ring to be cleaned
3038  **/
3039 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3040 {
3041         struct igb_buffer *buffer_info;
3042         unsigned long size;
3043         unsigned int i;
3044
3045         if (!tx_ring->buffer_info)
3046                 return;
3047         /* Free all the Tx ring sk_buffs */
3048
3049         for (i = 0; i < tx_ring->count; i++) {
3050                 buffer_info = &tx_ring->buffer_info[i];
3051                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3052         }
3053
3054         size = sizeof(struct igb_buffer) * tx_ring->count;
3055         memset(tx_ring->buffer_info, 0, size);
3056
3057         /* Zero out the descriptor ring */
3058         memset(tx_ring->desc, 0, tx_ring->size);
3059
3060         tx_ring->next_to_use = 0;
3061         tx_ring->next_to_clean = 0;
3062 }
3063
3064 /**
3065  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3066  * @adapter: board private structure
3067  **/
3068 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3069 {
3070         int i;
3071
3072         for (i = 0; i < adapter->num_tx_queues; i++)
3073                 igb_clean_tx_ring(adapter->tx_ring[i]);
3074 }
3075
3076 /**
3077  * igb_free_rx_resources - Free Rx Resources
3078  * @rx_ring: ring to clean the resources from
3079  *
3080  * Free all receive software resources
3081  **/
3082 void igb_free_rx_resources(struct igb_ring *rx_ring)
3083 {
3084         igb_clean_rx_ring(rx_ring);
3085
3086         vfree(rx_ring->buffer_info);
3087         rx_ring->buffer_info = NULL;
3088
3089         /* if not set, then don't free */
3090         if (!rx_ring->desc)
3091                 return;
3092
3093         dma_free_coherent(rx_ring->dev, rx_ring->size,
3094                           rx_ring->desc, rx_ring->dma);
3095
3096         rx_ring->desc = NULL;
3097 }
3098
3099 /**
3100  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3101  * @adapter: board private structure
3102  *
3103  * Free all receive software resources
3104  **/
3105 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3106 {
3107         int i;
3108
3109         for (i = 0; i < adapter->num_rx_queues; i++)
3110                 igb_free_rx_resources(adapter->rx_ring[i]);
3111 }
3112
3113 /**
3114  * igb_clean_rx_ring - Free Rx Buffers per Queue
3115  * @rx_ring: ring to free buffers from
3116  **/
3117 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3118 {
3119         struct igb_buffer *buffer_info;
3120         unsigned long size;
3121         unsigned int i;
3122
3123         if (!rx_ring->buffer_info)
3124                 return;
3125
3126         /* Free all the Rx ring sk_buffs */
3127         for (i = 0; i < rx_ring->count; i++) {
3128                 buffer_info = &rx_ring->buffer_info[i];
3129                 if (buffer_info->dma) {
3130                         dma_unmap_single(rx_ring->dev,
3131                                          buffer_info->dma,
3132                                          rx_ring->rx_buffer_len,
3133                                          DMA_FROM_DEVICE);
3134                         buffer_info->dma = 0;
3135                 }
3136
3137                 if (buffer_info->skb) {
3138                         dev_kfree_skb(buffer_info->skb);
3139                         buffer_info->skb = NULL;
3140                 }
3141                 if (buffer_info->page_dma) {
3142                         dma_unmap_page(rx_ring->dev,
3143                                        buffer_info->page_dma,
3144                                        PAGE_SIZE / 2,
3145                                        DMA_FROM_DEVICE);
3146                         buffer_info->page_dma = 0;
3147                 }
3148                 if (buffer_info->page) {
3149                         put_page(buffer_info->page);
3150                         buffer_info->page = NULL;
3151                         buffer_info->page_offset = 0;
3152                 }
3153         }
3154
3155         size = sizeof(struct igb_buffer) * rx_ring->count;
3156         memset(rx_ring->buffer_info, 0, size);
3157
3158         /* Zero out the descriptor ring */
3159         memset(rx_ring->desc, 0, rx_ring->size);
3160
3161         rx_ring->next_to_clean = 0;
3162         rx_ring->next_to_use = 0;
3163 }
3164
3165 /**
3166  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3167  * @adapter: board private structure
3168  **/
3169 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3170 {
3171         int i;
3172
3173         for (i = 0; i < adapter->num_rx_queues; i++)
3174                 igb_clean_rx_ring(adapter->rx_ring[i]);
3175 }
3176
3177 /**
3178  * igb_set_mac - Change the Ethernet Address of the NIC
3179  * @netdev: network interface device structure
3180  * @p: pointer to an address structure
3181  *
3182  * Returns 0 on success, negative on failure
3183  **/
3184 static int igb_set_mac(struct net_device *netdev, void *p)
3185 {
3186         struct igb_adapter *adapter = netdev_priv(netdev);
3187         struct e1000_hw *hw = &adapter->hw;
3188         struct sockaddr *addr = p;
3189
3190         if (!is_valid_ether_addr(addr->sa_data))
3191                 return -EADDRNOTAVAIL;
3192
3193         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3194         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3195
3196         /* set the correct pool for the new PF MAC address in entry 0 */
3197         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3198                          adapter->vfs_allocated_count);
3199
3200         return 0;
3201 }
3202
3203 /**
3204  * igb_write_mc_addr_list - write multicast addresses to MTA
3205  * @netdev: network interface device structure
3206  *
3207  * Writes multicast address list to the MTA hash table.
3208  * Returns: -ENOMEM on failure
3209  *                0 on no addresses written
3210  *                X on writing X addresses to MTA
3211  **/
3212 static int igb_write_mc_addr_list(struct net_device *netdev)
3213 {
3214         struct igb_adapter *adapter = netdev_priv(netdev);
3215         struct e1000_hw *hw = &adapter->hw;
3216         struct netdev_hw_addr *ha;
3217         u8  *mta_list;
3218         int i;
3219
3220         if (netdev_mc_empty(netdev)) {
3221                 /* nothing to program, so clear mc list */
3222                 igb_update_mc_addr_list(hw, NULL, 0);
3223                 igb_restore_vf_multicasts(adapter);
3224                 return 0;
3225         }
3226
3227         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3228         if (!mta_list)
3229                 return -ENOMEM;
3230
3231         /* The shared function expects a packed array of only addresses. */
3232         i = 0;
3233         netdev_for_each_mc_addr(ha, netdev)
3234                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3235
3236         igb_update_mc_addr_list(hw, mta_list, i);
3237         kfree(mta_list);
3238
3239         return netdev_mc_count(netdev);
3240 }
3241
3242 /**
3243  * igb_write_uc_addr_list - write unicast addresses to RAR table
3244  * @netdev: network interface device structure
3245  *
3246  * Writes unicast address list to the RAR table.
3247  * Returns: -ENOMEM on failure/insufficient address space
3248  *                0 on no addresses written
3249  *                X on writing X addresses to the RAR table
3250  **/
3251 static int igb_write_uc_addr_list(struct net_device *netdev)
3252 {
3253         struct igb_adapter *adapter = netdev_priv(netdev);
3254         struct e1000_hw *hw = &adapter->hw;
3255         unsigned int vfn = adapter->vfs_allocated_count;
3256         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3257         int count = 0;
3258
3259         /* return ENOMEM indicating insufficient memory for addresses */
3260         if (netdev_uc_count(netdev) > rar_entries)
3261                 return -ENOMEM;
3262
3263         if (!netdev_uc_empty(netdev) && rar_entries) {
3264                 struct netdev_hw_addr *ha;
3265
3266                 netdev_for_each_uc_addr(ha, netdev) {
3267                         if (!rar_entries)
3268                                 break;
3269                         igb_rar_set_qsel(adapter, ha->addr,
3270                                          rar_entries--,
3271                                          vfn);
3272                         count++;
3273                 }
3274         }
3275         /* write the addresses in reverse order to avoid write combining */
3276         for (; rar_entries > 0 ; rar_entries--) {
3277                 wr32(E1000_RAH(rar_entries), 0);
3278                 wr32(E1000_RAL(rar_entries), 0);
3279         }
3280         wrfl();
3281
3282         return count;
3283 }
3284
3285 /**
3286  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3287  * @netdev: network interface device structure
3288  *
3289  * The set_rx_mode entry point is called whenever the unicast or multicast
3290  * address lists or the network interface flags are updated.  This routine is
3291  * responsible for configuring the hardware for proper unicast, multicast,
3292  * promiscuous mode, and all-multi behavior.
3293  **/
3294 static void igb_set_rx_mode(struct net_device *netdev)
3295 {
3296         struct igb_adapter *adapter = netdev_priv(netdev);
3297         struct e1000_hw *hw = &adapter->hw;
3298         unsigned int vfn = adapter->vfs_allocated_count;
3299         u32 rctl, vmolr = 0;
3300         int count;
3301
3302         /* Check for Promiscuous and All Multicast modes */
3303         rctl = rd32(E1000_RCTL);
3304
3305         /* clear the effected bits */
3306         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3307
3308         if (netdev->flags & IFF_PROMISC) {
3309                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3310                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3311         } else {
3312                 if (netdev->flags & IFF_ALLMULTI) {
3313                         rctl |= E1000_RCTL_MPE;
3314                         vmolr |= E1000_VMOLR_MPME;
3315                 } else {
3316                         /*
3317                          * Write addresses to the MTA, if the attempt fails
3318                          * then we should just turn on promiscous mode so
3319                          * that we can at least receive multicast traffic
3320                          */
3321                         count = igb_write_mc_addr_list(netdev);
3322                         if (count < 0) {
3323                                 rctl |= E1000_RCTL_MPE;
3324                                 vmolr |= E1000_VMOLR_MPME;
3325                         } else if (count) {
3326                                 vmolr |= E1000_VMOLR_ROMPE;
3327                         }
3328                 }
3329                 /*
3330                  * Write addresses to available RAR registers, if there is not
3331                  * sufficient space to store all the addresses then enable
3332                  * unicast promiscous mode
3333                  */
3334                 count = igb_write_uc_addr_list(netdev);
3335                 if (count < 0) {
3336                         rctl |= E1000_RCTL_UPE;
3337                         vmolr |= E1000_VMOLR_ROPE;
3338                 }
3339                 rctl |= E1000_RCTL_VFE;
3340         }
3341         wr32(E1000_RCTL, rctl);
3342
3343         /*
3344          * In order to support SR-IOV and eventually VMDq it is necessary to set
3345          * the VMOLR to enable the appropriate modes.  Without this workaround
3346          * we will have issues with VLAN tag stripping not being done for frames
3347          * that are only arriving because we are the default pool
3348          */
3349         if (hw->mac.type < e1000_82576)
3350                 return;
3351
3352         vmolr |= rd32(E1000_VMOLR(vfn)) &
3353                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3354         wr32(E1000_VMOLR(vfn), vmolr);
3355         igb_restore_vf_multicasts(adapter);
3356 }
3357
3358 /* Need to wait a few seconds after link up to get diagnostic information from
3359  * the phy */
3360 static void igb_update_phy_info(unsigned long data)
3361 {
3362         struct igb_adapter *adapter = (struct igb_adapter *) data;
3363         igb_get_phy_info(&adapter->hw);
3364 }
3365
3366 /**
3367  * igb_has_link - check shared code for link and determine up/down
3368  * @adapter: pointer to driver private info
3369  **/
3370 bool igb_has_link(struct igb_adapter *adapter)
3371 {
3372         struct e1000_hw *hw = &adapter->hw;
3373         bool link_active = false;
3374         s32 ret_val = 0;
3375
3376         /* get_link_status is set on LSC (link status) interrupt or
3377          * rx sequence error interrupt.  get_link_status will stay
3378          * false until the e1000_check_for_link establishes link
3379          * for copper adapters ONLY
3380          */
3381         switch (hw->phy.media_type) {
3382         case e1000_media_type_copper:
3383                 if (hw->mac.get_link_status) {
3384                         ret_val = hw->mac.ops.check_for_link(hw);
3385                         link_active = !hw->mac.get_link_status;
3386                 } else {
3387                         link_active = true;
3388                 }
3389                 break;
3390         case e1000_media_type_internal_serdes:
3391                 ret_val = hw->mac.ops.check_for_link(hw);
3392                 link_active = hw->mac.serdes_has_link;
3393                 break;
3394         default:
3395         case e1000_media_type_unknown:
3396                 break;
3397         }
3398
3399         return link_active;
3400 }
3401
3402 /**
3403  * igb_watchdog - Timer Call-back
3404  * @data: pointer to adapter cast into an unsigned long
3405  **/
3406 static void igb_watchdog(unsigned long data)
3407 {
3408         struct igb_adapter *adapter = (struct igb_adapter *)data;
3409         /* Do the rest outside of interrupt context */
3410         schedule_work(&adapter->watchdog_task);
3411 }
3412
3413 static void igb_watchdog_task(struct work_struct *work)
3414 {
3415         struct igb_adapter *adapter = container_of(work,
3416                                                    struct igb_adapter,
3417                                                    watchdog_task);
3418         struct e1000_hw *hw = &adapter->hw;
3419         struct net_device *netdev = adapter->netdev;
3420         u32 link;
3421         int i;
3422
3423         link = igb_has_link(adapter);
3424         if (link) {
3425                 if (!netif_carrier_ok(netdev)) {
3426                         u32 ctrl;
3427                         hw->mac.ops.get_speed_and_duplex(hw,
3428                                                          &adapter->link_speed,
3429                                                          &adapter->link_duplex);
3430
3431                         ctrl = rd32(E1000_CTRL);
3432                         /* Links status message must follow this format */
3433                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3434                                  "Flow Control: %s\n",
3435                                netdev->name,
3436                                adapter->link_speed,
3437                                adapter->link_duplex == FULL_DUPLEX ?
3438                                  "Full Duplex" : "Half Duplex",
3439                                ((ctrl & E1000_CTRL_TFCE) &&
3440                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3441                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3442                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3443
3444                         /* adjust timeout factor according to speed/duplex */
3445                         adapter->tx_timeout_factor = 1;
3446                         switch (adapter->link_speed) {
3447                         case SPEED_10:
3448                                 adapter->tx_timeout_factor = 14;
3449                                 break;
3450                         case SPEED_100:
3451                                 /* maybe add some timeout factor ? */
3452                                 break;
3453                         }
3454
3455                         netif_carrier_on(netdev);
3456
3457                         igb_ping_all_vfs(adapter);
3458
3459                         /* link state has changed, schedule phy info update */
3460                         if (!test_bit(__IGB_DOWN, &adapter->state))
3461                                 mod_timer(&adapter->phy_info_timer,
3462                                           round_jiffies(jiffies + 2 * HZ));
3463                 }
3464         } else {
3465                 if (netif_carrier_ok(netdev)) {
3466                         adapter->link_speed = 0;
3467                         adapter->link_duplex = 0;
3468                         /* Links status message must follow this format */
3469                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3470                                netdev->name);
3471                         netif_carrier_off(netdev);
3472
3473                         igb_ping_all_vfs(adapter);
3474
3475                         /* link state has changed, schedule phy info update */
3476                         if (!test_bit(__IGB_DOWN, &adapter->state))
3477                                 mod_timer(&adapter->phy_info_timer,
3478                                           round_jiffies(jiffies + 2 * HZ));
3479                 }
3480         }
3481
3482         igb_update_stats(adapter);
3483
3484         for (i = 0; i < adapter->num_tx_queues; i++) {
3485                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3486                 if (!netif_carrier_ok(netdev)) {
3487                         /* We've lost link, so the controller stops DMA,
3488                          * but we've got queued Tx work that's never going
3489                          * to get done, so reset controller to flush Tx.
3490                          * (Do the reset outside of interrupt context). */
3491                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3492                                 adapter->tx_timeout_count++;
3493                                 schedule_work(&adapter->reset_task);
3494                                 /* return immediately since reset is imminent */
3495                                 return;
3496                         }
3497                 }
3498
3499                 /* Force detection of hung controller every watchdog period */
3500                 tx_ring->detect_tx_hung = true;
3501         }
3502
3503         /* Cause software interrupt to ensure rx ring is cleaned */
3504         if (adapter->msix_entries) {
3505                 u32 eics = 0;
3506                 for (i = 0; i < adapter->num_q_vectors; i++) {
3507                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3508                         eics |= q_vector->eims_value;
3509                 }
3510                 wr32(E1000_EICS, eics);
3511         } else {
3512                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3513         }
3514
3515         /* Reset the timer */
3516         if (!test_bit(__IGB_DOWN, &adapter->state))
3517                 mod_timer(&adapter->watchdog_timer,
3518                           round_jiffies(jiffies + 2 * HZ));
3519 }
3520
3521 enum latency_range {
3522         lowest_latency = 0,
3523         low_latency = 1,
3524         bulk_latency = 2,
3525         latency_invalid = 255
3526 };
3527
3528 /**
3529  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3530  *
3531  *      Stores a new ITR value based on strictly on packet size.  This
3532  *      algorithm is less sophisticated than that used in igb_update_itr,
3533  *      due to the difficulty of synchronizing statistics across multiple
3534  *      receive rings.  The divisors and thresholds used by this fuction
3535  *      were determined based on theoretical maximum wire speed and testing
3536  *      data, in order to minimize response time while increasing bulk
3537  *      throughput.
3538  *      This functionality is controlled by the InterruptThrottleRate module
3539  *      parameter (see igb_param.c)
3540  *      NOTE:  This function is called only when operating in a multiqueue
3541  *             receive environment.
3542  * @q_vector: pointer to q_vector
3543  **/
3544 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3545 {
3546         int new_val = q_vector->itr_val;
3547         int avg_wire_size = 0;
3548         struct igb_adapter *adapter = q_vector->adapter;
3549
3550         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3551          * ints/sec - ITR timer value of 120 ticks.
3552          */
3553         if (adapter->link_speed != SPEED_1000) {
3554                 new_val = 976;
3555                 goto set_itr_val;
3556         }
3557
3558         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3559                 struct igb_ring *ring = q_vector->rx_ring;
3560                 avg_wire_size = ring->total_bytes / ring->total_packets;
3561         }
3562
3563         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3564                 struct igb_ring *ring = q_vector->tx_ring;
3565                 avg_wire_size = max_t(u32, avg_wire_size,
3566                                       (ring->total_bytes /
3567                                        ring->total_packets));
3568         }
3569
3570         /* if avg_wire_size isn't set no work was done */
3571         if (!avg_wire_size)
3572                 goto clear_counts;
3573
3574         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3575         avg_wire_size += 24;
3576
3577         /* Don't starve jumbo frames */
3578         avg_wire_size = min(avg_wire_size, 3000);
3579
3580         /* Give a little boost to mid-size frames */
3581         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3582                 new_val = avg_wire_size / 3;
3583         else
3584                 new_val = avg_wire_size / 2;
3585
3586         /* when in itr mode 3 do not exceed 20K ints/sec */
3587         if (adapter->rx_itr_setting == 3 && new_val < 196)
3588                 new_val = 196;
3589
3590 set_itr_val:
3591         if (new_val != q_vector->itr_val) {
3592                 q_vector->itr_val = new_val;
3593                 q_vector->set_itr = 1;
3594         }
3595 clear_counts:
3596         if (q_vector->rx_ring) {
3597                 q_vector->rx_ring->total_bytes = 0;
3598                 q_vector->rx_ring->total_packets = 0;
3599         }
3600         if (q_vector->tx_ring) {
3601                 q_vector->tx_ring->total_bytes = 0;
3602                 q_vector->tx_ring->total_packets = 0;
3603         }
3604 }
3605
3606 /**
3607  * igb_update_itr - update the dynamic ITR value based on statistics
3608  *      Stores a new ITR value based on packets and byte
3609  *      counts during the last interrupt.  The advantage of per interrupt
3610  *      computation is faster updates and more accurate ITR for the current
3611  *      traffic pattern.  Constants in this function were computed
3612  *      based on theoretical maximum wire speed and thresholds were set based
3613  *      on testing data as well as attempting to minimize response time
3614  *      while increasing bulk throughput.
3615  *      this functionality is controlled by the InterruptThrottleRate module
3616  *      parameter (see igb_param.c)
3617  *      NOTE:  These calculations are only valid when operating in a single-
3618  *             queue environment.
3619  * @adapter: pointer to adapter
3620  * @itr_setting: current q_vector->itr_val
3621  * @packets: the number of packets during this measurement interval
3622  * @bytes: the number of bytes during this measurement interval
3623  **/
3624 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3625                                    int packets, int bytes)
3626 {
3627         unsigned int retval = itr_setting;
3628
3629         if (packets == 0)
3630                 goto update_itr_done;
3631
3632         switch (itr_setting) {
3633         case lowest_latency:
3634                 /* handle TSO and jumbo frames */
3635                 if (bytes/packets > 8000)
3636                         retval = bulk_latency;
3637                 else if ((packets < 5) && (bytes > 512))
3638                         retval = low_latency;
3639                 break;
3640         case low_latency:  /* 50 usec aka 20000 ints/s */
3641                 if (bytes > 10000) {
3642                         /* this if handles the TSO accounting */
3643                         if (bytes/packets > 8000) {
3644                                 retval = bulk_latency;
3645                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3646                                 retval = bulk_latency;
3647                         } else if ((packets > 35)) {
3648                                 retval = lowest_latency;
3649                         }
3650                 } else if (bytes/packets > 2000) {
3651                         retval = bulk_latency;
3652                 } else if (packets <= 2 && bytes < 512) {
3653                         retval = lowest_latency;
3654                 }
3655                 break;
3656         case bulk_latency: /* 250 usec aka 4000 ints/s */
3657                 if (bytes > 25000) {
3658                         if (packets > 35)
3659                                 retval = low_latency;
3660                 } else if (bytes < 1500) {
3661                         retval = low_latency;
3662                 }
3663                 break;
3664         }
3665
3666 update_itr_done:
3667         return retval;
3668 }
3669
3670 static void igb_set_itr(struct igb_adapter *adapter)
3671 {
3672         struct igb_q_vector *q_vector = adapter->q_vector[0];
3673         u16 current_itr;
3674         u32 new_itr = q_vector->itr_val;
3675
3676         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3677         if (adapter->link_speed != SPEED_1000) {
3678                 current_itr = 0;
3679                 new_itr = 4000;
3680                 goto set_itr_now;
3681         }
3682
3683         adapter->rx_itr = igb_update_itr(adapter,
3684                                     adapter->rx_itr,
3685                                     q_vector->rx_ring->total_packets,
3686                                     q_vector->rx_ring->total_bytes);
3687
3688         adapter->tx_itr = igb_update_itr(adapter,
3689                                     adapter->tx_itr,
3690                                     q_vector->tx_ring->total_packets,
3691                                     q_vector->tx_ring->total_bytes);
3692         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3693
3694         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3695         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3696                 current_itr = low_latency;
3697
3698         switch (current_itr) {
3699         /* counts and packets in update_itr are dependent on these numbers */
3700         case lowest_latency:
3701                 new_itr = 56;  /* aka 70,000 ints/sec */
3702                 break;
3703         case low_latency:
3704                 new_itr = 196; /* aka 20,000 ints/sec */
3705                 break;
3706         case bulk_latency:
3707                 new_itr = 980; /* aka 4,000 ints/sec */
3708                 break;
3709         default:
3710                 break;
3711         }
3712
3713 set_itr_now:
3714         q_vector->rx_ring->total_bytes = 0;
3715         q_vector->rx_ring->total_packets = 0;
3716         q_vector->tx_ring->total_bytes = 0;
3717         q_vector->tx_ring->total_packets = 0;
3718
3719         if (new_itr != q_vector->itr_val) {
3720                 /* this attempts to bias the interrupt rate towards Bulk
3721                  * by adding intermediate steps when interrupt rate is
3722                  * increasing */
3723                 new_itr = new_itr > q_vector->itr_val ?
3724                              max((new_itr * q_vector->itr_val) /
3725                                  (new_itr + (q_vector->itr_val >> 2)),
3726                                  new_itr) :
3727                              new_itr;
3728                 /* Don't write the value here; it resets the adapter's
3729                  * internal timer, and causes us to delay far longer than
3730                  * we should between interrupts.  Instead, we write the ITR
3731                  * value at the beginning of the next interrupt so the timing
3732                  * ends up being correct.
3733                  */
3734                 q_vector->itr_val = new_itr;
3735                 q_vector->set_itr = 1;
3736         }
3737 }
3738
3739 #define IGB_TX_FLAGS_CSUM               0x00000001
3740 #define IGB_TX_FLAGS_VLAN               0x00000002
3741 #define IGB_TX_FLAGS_TSO                0x00000004
3742 #define IGB_TX_FLAGS_IPV4               0x00000008
3743 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3744 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3745 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3746
3747 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3748                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3749 {
3750         struct e1000_adv_tx_context_desc *context_desc;
3751         unsigned int i;
3752         int err;
3753         struct igb_buffer *buffer_info;
3754         u32 info = 0, tu_cmd = 0;
3755         u32 mss_l4len_idx;
3756         u8 l4len;
3757
3758         if (skb_header_cloned(skb)) {
3759                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3760                 if (err)
3761                         return err;
3762         }
3763
3764         l4len = tcp_hdrlen(skb);
3765         *hdr_len += l4len;
3766
3767         if (skb->protocol == htons(ETH_P_IP)) {
3768                 struct iphdr *iph = ip_hdr(skb);
3769                 iph->tot_len = 0;
3770                 iph->check = 0;
3771                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3772                                                          iph->daddr, 0,
3773                                                          IPPROTO_TCP,
3774                                                          0);
3775         } else if (skb_is_gso_v6(skb)) {
3776                 ipv6_hdr(skb)->payload_len = 0;
3777                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3778                                                        &ipv6_hdr(skb)->daddr,
3779                                                        0, IPPROTO_TCP, 0);
3780         }
3781
3782         i = tx_ring->next_to_use;
3783
3784         buffer_info = &tx_ring->buffer_info[i];
3785         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3786         /* VLAN MACLEN IPLEN */
3787         if (tx_flags & IGB_TX_FLAGS_VLAN)
3788                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3789         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3790         *hdr_len += skb_network_offset(skb);
3791         info |= skb_network_header_len(skb);
3792         *hdr_len += skb_network_header_len(skb);
3793         context_desc->vlan_macip_lens = cpu_to_le32(info);
3794
3795         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3796         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3797
3798         if (skb->protocol == htons(ETH_P_IP))
3799                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3800         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3801
3802         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3803
3804         /* MSS L4LEN IDX */
3805         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3806         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3807
3808         /* For 82575, context index must be unique per ring. */
3809         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3810                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3811
3812         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3813         context_desc->seqnum_seed = 0;
3814
3815         buffer_info->time_stamp = jiffies;
3816         buffer_info->next_to_watch = i;
3817         buffer_info->dma = 0;
3818         i++;
3819         if (i == tx_ring->count)
3820                 i = 0;
3821
3822         tx_ring->next_to_use = i;
3823
3824         return true;
3825 }
3826
3827 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3828                                    struct sk_buff *skb, u32 tx_flags)
3829 {
3830         struct e1000_adv_tx_context_desc *context_desc;
3831         struct device *dev = tx_ring->dev;
3832         struct igb_buffer *buffer_info;
3833         u32 info = 0, tu_cmd = 0;
3834         unsigned int i;
3835
3836         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3837             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3838                 i = tx_ring->next_to_use;
3839                 buffer_info = &tx_ring->buffer_info[i];
3840                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3841
3842                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3843                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3844
3845                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3846                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3847                         info |= skb_network_header_len(skb);
3848
3849                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3850
3851                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3852
3853                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3854                         __be16 protocol;
3855
3856                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3857                                 const struct vlan_ethhdr *vhdr =
3858                                           (const struct vlan_ethhdr*)skb->data;
3859
3860                                 protocol = vhdr->h_vlan_encapsulated_proto;
3861                         } else {
3862                                 protocol = skb->protocol;
3863                         }
3864
3865                         switch (protocol) {
3866                         case cpu_to_be16(ETH_P_IP):
3867                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3868                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3869                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3870                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3871                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3872                                 break;
3873                         case cpu_to_be16(ETH_P_IPV6):
3874                                 /* XXX what about other V6 headers?? */
3875                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3876                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3877                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3878                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3879                                 break;
3880                         default:
3881                                 if (unlikely(net_ratelimit()))
3882                                         dev_warn(dev,
3883                                             "partial checksum but proto=%x!\n",
3884                                             skb->protocol);
3885                                 break;
3886                         }
3887                 }
3888
3889                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3890                 context_desc->seqnum_seed = 0;
3891                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3892                         context_desc->mss_l4len_idx =
3893                                 cpu_to_le32(tx_ring->reg_idx << 4);
3894
3895                 buffer_info->time_stamp = jiffies;
3896                 buffer_info->next_to_watch = i;
3897                 buffer_info->dma = 0;
3898
3899                 i++;
3900                 if (i == tx_ring->count)
3901                         i = 0;
3902                 tx_ring->next_to_use = i;
3903
3904                 return true;
3905         }
3906         return false;
3907 }
3908
3909 #define IGB_MAX_TXD_PWR 16
3910 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3911
3912 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3913                                  unsigned int first)
3914 {
3915         struct igb_buffer *buffer_info;
3916         struct device *dev = tx_ring->dev;
3917         unsigned int hlen = skb_headlen(skb);
3918         unsigned int count = 0, i;
3919         unsigned int f;
3920         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3921
3922         i = tx_ring->next_to_use;
3923
3924         buffer_info = &tx_ring->buffer_info[i];
3925         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3926         buffer_info->length = hlen;
3927         /* set time_stamp *before* dma to help avoid a possible race */
3928         buffer_info->time_stamp = jiffies;
3929         buffer_info->next_to_watch = i;
3930         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3931                                           DMA_TO_DEVICE);
3932         if (dma_mapping_error(dev, buffer_info->dma))
3933                 goto dma_error;
3934
3935         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3936                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3937                 unsigned int len = frag->size;
3938
3939                 count++;
3940                 i++;
3941                 if (i == tx_ring->count)
3942                         i = 0;
3943
3944                 buffer_info = &tx_ring->buffer_info[i];
3945                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3946                 buffer_info->length = len;
3947                 buffer_info->time_stamp = jiffies;
3948                 buffer_info->next_to_watch = i;
3949                 buffer_info->mapped_as_page = true;
3950                 buffer_info->dma = dma_map_page(dev,
3951                                                 frag->page,
3952                                                 frag->page_offset,
3953                                                 len,
3954                                                 DMA_TO_DEVICE);
3955                 if (dma_mapping_error(dev, buffer_info->dma))
3956                         goto dma_error;
3957
3958         }
3959
3960         tx_ring->buffer_info[i].skb = skb;
3961         tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags;
3962         /* multiply data chunks by size of headers */
3963         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3964         tx_ring->buffer_info[i].gso_segs = gso_segs;
3965         tx_ring->buffer_info[first].next_to_watch = i;
3966
3967         return ++count;
3968
3969 dma_error:
3970         dev_err(dev, "TX DMA map failed\n");
3971
3972         /* clear timestamp and dma mappings for failed buffer_info mapping */
3973         buffer_info->dma = 0;
3974         buffer_info->time_stamp = 0;
3975         buffer_info->length = 0;
3976         buffer_info->next_to_watch = 0;
3977         buffer_info->mapped_as_page = false;
3978
3979         /* clear timestamp and dma mappings for remaining portion of packet */
3980         while (count--) {
3981                 if (i == 0)
3982                         i = tx_ring->count;
3983                 i--;
3984                 buffer_info = &tx_ring->buffer_info[i];
3985                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3986         }
3987
3988         return 0;
3989 }
3990
3991 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3992                                     u32 tx_flags, int count, u32 paylen,
3993                                     u8 hdr_len)
3994 {
3995         union e1000_adv_tx_desc *tx_desc;
3996         struct igb_buffer *buffer_info;
3997         u32 olinfo_status = 0, cmd_type_len;
3998         unsigned int i = tx_ring->next_to_use;
3999
4000         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
4001                         E1000_ADVTXD_DCMD_DEXT);
4002
4003         if (tx_flags & IGB_TX_FLAGS_VLAN)
4004                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
4005
4006         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
4007                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
4008
4009         if (tx_flags & IGB_TX_FLAGS_TSO) {
4010                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4011
4012                 /* insert tcp checksum */
4013                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4014
4015                 /* insert ip checksum */
4016                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4017                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4018
4019         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4020                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4021         }
4022
4023         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4024             (tx_flags & (IGB_TX_FLAGS_CSUM |
4025                          IGB_TX_FLAGS_TSO |
4026                          IGB_TX_FLAGS_VLAN)))
4027                 olinfo_status |= tx_ring->reg_idx << 4;
4028
4029         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4030
4031         do {
4032                 buffer_info = &tx_ring->buffer_info[i];
4033                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4034                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4035                 tx_desc->read.cmd_type_len =
4036                         cpu_to_le32(cmd_type_len | buffer_info->length);
4037                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4038                 count--;
4039                 i++;
4040                 if (i == tx_ring->count)
4041                         i = 0;
4042         } while (count > 0);
4043
4044         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4045         /* Force memory writes to complete before letting h/w
4046          * know there are new descriptors to fetch.  (Only
4047          * applicable for weak-ordered memory model archs,
4048          * such as IA-64). */
4049         wmb();
4050
4051         tx_ring->next_to_use = i;
4052         writel(i, tx_ring->tail);
4053         /* we need this if more than one processor can write to our tail
4054          * at a time, it syncronizes IO on IA64/Altix systems */
4055         mmiowb();
4056 }
4057
4058 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4059 {
4060         struct net_device *netdev = tx_ring->netdev;
4061
4062         netif_stop_subqueue(netdev, tx_ring->queue_index);
4063
4064         /* Herbert's original patch had:
4065          *  smp_mb__after_netif_stop_queue();
4066          * but since that doesn't exist yet, just open code it. */
4067         smp_mb();
4068
4069         /* We need to check again in a case another CPU has just
4070          * made room available. */
4071         if (igb_desc_unused(tx_ring) < size)
4072                 return -EBUSY;
4073
4074         /* A reprieve! */
4075         netif_wake_subqueue(netdev, tx_ring->queue_index);
4076         tx_ring->tx_stats.restart_queue++;
4077         return 0;
4078 }
4079
4080 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4081 {
4082         if (igb_desc_unused(tx_ring) >= size)
4083                 return 0;
4084         return __igb_maybe_stop_tx(tx_ring, size);
4085 }
4086
4087 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4088                                     struct igb_ring *tx_ring)
4089 {
4090         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4091         int tso = 0, count;
4092         u32 tx_flags = 0;
4093         u16 first;
4094         u8 hdr_len = 0;
4095
4096         /* need: 1 descriptor per page,
4097          *       + 2 desc gap to keep tail from touching head,
4098          *       + 1 desc for skb->data,
4099          *       + 1 desc for context descriptor,
4100          * otherwise try next time */
4101         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4102                 /* this is a hard error */
4103                 return NETDEV_TX_BUSY;
4104         }
4105
4106         if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) {
4107                 skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
4108                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4109         }
4110
4111         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
4112                 tx_flags |= IGB_TX_FLAGS_VLAN;
4113                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4114         }
4115
4116         if (skb->protocol == htons(ETH_P_IP))
4117                 tx_flags |= IGB_TX_FLAGS_IPV4;
4118
4119         first = tx_ring->next_to_use;
4120         if (skb_is_gso(skb)) {
4121                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4122
4123                 if (tso < 0) {
4124                         dev_kfree_skb_any(skb);
4125                         return NETDEV_TX_OK;
4126                 }
4127         }
4128
4129         if (tso)
4130                 tx_flags |= IGB_TX_FLAGS_TSO;
4131         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4132                  (skb->ip_summed == CHECKSUM_PARTIAL))
4133                 tx_flags |= IGB_TX_FLAGS_CSUM;
4134
4135         /*
4136          * count reflects descriptors mapped, if 0 or less then mapping error
4137          * has occured and we need to rewind the descriptor queue
4138          */
4139         count = igb_tx_map_adv(tx_ring, skb, first);
4140         if (!count) {
4141                 dev_kfree_skb_any(skb);
4142                 tx_ring->buffer_info[first].time_stamp = 0;
4143                 tx_ring->next_to_use = first;
4144                 return NETDEV_TX_OK;
4145         }
4146
4147         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4148
4149         /* Make sure there is space in the ring for the next send. */
4150         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4151
4152         return NETDEV_TX_OK;
4153 }
4154
4155 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4156                                       struct net_device *netdev)
4157 {
4158         struct igb_adapter *adapter = netdev_priv(netdev);
4159         struct igb_ring *tx_ring;
4160         int r_idx = 0;
4161
4162         if (test_bit(__IGB_DOWN, &adapter->state)) {
4163                 dev_kfree_skb_any(skb);
4164                 return NETDEV_TX_OK;
4165         }
4166
4167         if (skb->len <= 0) {
4168                 dev_kfree_skb_any(skb);
4169                 return NETDEV_TX_OK;
4170         }
4171
4172         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4173         tx_ring = adapter->multi_tx_table[r_idx];
4174
4175         /* This goes back to the question of how to logically map a tx queue
4176          * to a flow.  Right now, performance is impacted slightly negatively
4177          * if using multiple tx queues.  If the stack breaks away from a
4178          * single qdisc implementation, we can look at this again. */
4179         return igb_xmit_frame_ring_adv(skb, tx_ring);
4180 }
4181
4182 /**
4183  * igb_tx_timeout - Respond to a Tx Hang
4184  * @netdev: network interface device structure
4185  **/
4186 static void igb_tx_timeout(struct net_device *netdev)
4187 {
4188         struct igb_adapter *adapter = netdev_priv(netdev);
4189         struct e1000_hw *hw = &adapter->hw;
4190
4191         /* Do the reset outside of interrupt context */
4192         adapter->tx_timeout_count++;
4193
4194         if (hw->mac.type == e1000_82580)
4195                 hw->dev_spec._82575.global_device_reset = true;
4196
4197         schedule_work(&adapter->reset_task);
4198         wr32(E1000_EICS,
4199              (adapter->eims_enable_mask & ~adapter->eims_other));
4200 }
4201
4202 static void igb_reset_task(struct work_struct *work)
4203 {
4204         struct igb_adapter *adapter;
4205         adapter = container_of(work, struct igb_adapter, reset_task);
4206
4207         igb_dump(adapter);
4208         netdev_err(adapter->netdev, "Reset adapter\n");
4209         igb_reinit_locked(adapter);
4210 }
4211
4212 /**
4213  * igb_get_stats - Get System Network Statistics
4214  * @netdev: network interface device structure
4215  *
4216  * Returns the address of the device statistics structure.
4217  * The statistics are actually updated from the timer callback.
4218  **/
4219 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4220 {
4221         /* only return the current stats */
4222         return &netdev->stats;
4223 }
4224
4225 /**
4226  * igb_change_mtu - Change the Maximum Transfer Unit
4227  * @netdev: network interface device structure
4228  * @new_mtu: new value for maximum frame size
4229  *
4230  * Returns 0 on success, negative on failure
4231  **/
4232 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4233 {
4234         struct igb_adapter *adapter = netdev_priv(netdev);
4235         struct pci_dev *pdev = adapter->pdev;
4236         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4237         u32 rx_buffer_len, i;
4238
4239         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4240                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4241                 return -EINVAL;
4242         }
4243
4244         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4245                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4246                 return -EINVAL;
4247         }
4248
4249         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4250                 msleep(1);
4251
4252         /* igb_down has a dependency on max_frame_size */
4253         adapter->max_frame_size = max_frame;
4254
4255         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4256          * means we reserve 2 more, this pushes us to allocate from the next
4257          * larger slab size.
4258          * i.e. RXBUFFER_2048 --> size-4096 slab
4259          */
4260
4261         if (adapter->hw.mac.type == e1000_82580)
4262                 max_frame += IGB_TS_HDR_LEN;
4263
4264         if (max_frame <= IGB_RXBUFFER_1024)
4265                 rx_buffer_len = IGB_RXBUFFER_1024;
4266         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4267                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4268         else
4269                 rx_buffer_len = IGB_RXBUFFER_128;
4270
4271         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4272              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4273                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4274
4275         if ((adapter->hw.mac.type == e1000_82580) &&
4276             (rx_buffer_len == IGB_RXBUFFER_128))
4277                 rx_buffer_len += IGB_RXBUFFER_64;
4278
4279         if (netif_running(netdev))
4280                 igb_down(adapter);
4281
4282         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4283                  netdev->mtu, new_mtu);
4284         netdev->mtu = new_mtu;
4285
4286         for (i = 0; i < adapter->num_rx_queues; i++)
4287                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4288
4289         if (netif_running(netdev))
4290                 igb_up(adapter);
4291         else
4292                 igb_reset(adapter);
4293
4294         clear_bit(__IGB_RESETTING, &adapter->state);
4295
4296         return 0;
4297 }
4298
4299 /**
4300  * igb_update_stats - Update the board statistics counters
4301  * @adapter: board private structure
4302  **/
4303
4304 void igb_update_stats(struct igb_adapter *adapter)
4305 {
4306         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
4307         struct e1000_hw *hw = &adapter->hw;
4308         struct pci_dev *pdev = adapter->pdev;
4309         u32 reg, mpc;
4310         u16 phy_tmp;
4311         int i;
4312         u64 bytes, packets;
4313
4314 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4315
4316         /*
4317          * Prevent stats update while adapter is being reset, or if the pci
4318          * connection is down.
4319          */
4320         if (adapter->link_speed == 0)
4321                 return;
4322         if (pci_channel_offline(pdev))
4323                 return;
4324
4325         bytes = 0;
4326         packets = 0;
4327         for (i = 0; i < adapter->num_rx_queues; i++) {
4328                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4329                 struct igb_ring *ring = adapter->rx_ring[i];
4330                 ring->rx_stats.drops += rqdpc_tmp;
4331                 net_stats->rx_fifo_errors += rqdpc_tmp;
4332                 bytes += ring->rx_stats.bytes;
4333                 packets += ring->rx_stats.packets;
4334         }
4335
4336         net_stats->rx_bytes = bytes;
4337         net_stats->rx_packets = packets;
4338
4339         bytes = 0;
4340         packets = 0;
4341         for (i = 0; i < adapter->num_tx_queues; i++) {
4342                 struct igb_ring *ring = adapter->tx_ring[i];
4343                 bytes += ring->tx_stats.bytes;
4344                 packets += ring->tx_stats.packets;
4345         }
4346         net_stats->tx_bytes = bytes;
4347         net_stats->tx_packets = packets;
4348
4349         /* read stats registers */
4350         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4351         adapter->stats.gprc += rd32(E1000_GPRC);
4352         adapter->stats.gorc += rd32(E1000_GORCL);
4353         rd32(E1000_GORCH); /* clear GORCL */
4354         adapter->stats.bprc += rd32(E1000_BPRC);
4355         adapter->stats.mprc += rd32(E1000_MPRC);
4356         adapter->stats.roc += rd32(E1000_ROC);
4357
4358         adapter->stats.prc64 += rd32(E1000_PRC64);
4359         adapter->stats.prc127 += rd32(E1000_PRC127);
4360         adapter->stats.prc255 += rd32(E1000_PRC255);
4361         adapter->stats.prc511 += rd32(E1000_PRC511);
4362         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4363         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4364         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4365         adapter->stats.sec += rd32(E1000_SEC);
4366
4367         mpc = rd32(E1000_MPC);
4368         adapter->stats.mpc += mpc;
4369         net_stats->rx_fifo_errors += mpc;
4370         adapter->stats.scc += rd32(E1000_SCC);
4371         adapter->stats.ecol += rd32(E1000_ECOL);
4372         adapter->stats.mcc += rd32(E1000_MCC);
4373         adapter->stats.latecol += rd32(E1000_LATECOL);
4374         adapter->stats.dc += rd32(E1000_DC);
4375         adapter->stats.rlec += rd32(E1000_RLEC);
4376         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4377         adapter->stats.xontxc += rd32(E1000_XONTXC);
4378         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4379         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4380         adapter->stats.fcruc += rd32(E1000_FCRUC);
4381         adapter->stats.gptc += rd32(E1000_GPTC);
4382         adapter->stats.gotc += rd32(E1000_GOTCL);
4383         rd32(E1000_GOTCH); /* clear GOTCL */
4384         adapter->stats.rnbc += rd32(E1000_RNBC);
4385         adapter->stats.ruc += rd32(E1000_RUC);
4386         adapter->stats.rfc += rd32(E1000_RFC);
4387         adapter->stats.rjc += rd32(E1000_RJC);
4388         adapter->stats.tor += rd32(E1000_TORH);
4389         adapter->stats.tot += rd32(E1000_TOTH);
4390         adapter->stats.tpr += rd32(E1000_TPR);
4391
4392         adapter->stats.ptc64 += rd32(E1000_PTC64);
4393         adapter->stats.ptc127 += rd32(E1000_PTC127);
4394         adapter->stats.ptc255 += rd32(E1000_PTC255);
4395         adapter->stats.ptc511 += rd32(E1000_PTC511);
4396         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4397         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4398
4399         adapter->stats.mptc += rd32(E1000_MPTC);
4400         adapter->stats.bptc += rd32(E1000_BPTC);
4401
4402         adapter->stats.tpt += rd32(E1000_TPT);
4403         adapter->stats.colc += rd32(E1000_COLC);
4404
4405         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4406         /* read internal phy specific stats */
4407         reg = rd32(E1000_CTRL_EXT);
4408         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4409                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4410                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4411         }
4412
4413         adapter->stats.tsctc += rd32(E1000_TSCTC);
4414         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4415
4416         adapter->stats.iac += rd32(E1000_IAC);
4417         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4418         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4419         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4420         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4421         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4422         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4423         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4424         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4425
4426         /* Fill out the OS statistics structure */
4427         net_stats->multicast = adapter->stats.mprc;
4428         net_stats->collisions = adapter->stats.colc;
4429
4430         /* Rx Errors */
4431
4432         /* RLEC on some newer hardware can be incorrect so build
4433          * our own version based on RUC and ROC */
4434         net_stats->rx_errors = adapter->stats.rxerrc +
4435                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4436                 adapter->stats.ruc + adapter->stats.roc +
4437                 adapter->stats.cexterr;
4438         net_stats->rx_length_errors = adapter->stats.ruc +
4439                                       adapter->stats.roc;
4440         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4441         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4442         net_stats->rx_missed_errors = adapter->stats.mpc;
4443
4444         /* Tx Errors */
4445         net_stats->tx_errors = adapter->stats.ecol +
4446                                adapter->stats.latecol;
4447         net_stats->tx_aborted_errors = adapter->stats.ecol;
4448         net_stats->tx_window_errors = adapter->stats.latecol;
4449         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4450
4451         /* Tx Dropped needs to be maintained elsewhere */
4452
4453         /* Phy Stats */
4454         if (hw->phy.media_type == e1000_media_type_copper) {
4455                 if ((adapter->link_speed == SPEED_1000) &&
4456                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4457                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4458                         adapter->phy_stats.idle_errors += phy_tmp;
4459                 }
4460         }
4461
4462         /* Management Stats */
4463         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4464         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4465         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4466 }
4467
4468 static irqreturn_t igb_msix_other(int irq, void *data)
4469 {
4470         struct igb_adapter *adapter = data;
4471         struct e1000_hw *hw = &adapter->hw;
4472         u32 icr = rd32(E1000_ICR);
4473         /* reading ICR causes bit 31 of EICR to be cleared */
4474
4475         if (icr & E1000_ICR_DRSTA)
4476                 schedule_work(&adapter->reset_task);
4477
4478         if (icr & E1000_ICR_DOUTSYNC) {
4479                 /* HW is reporting DMA is out of sync */
4480                 adapter->stats.doosync++;
4481         }
4482
4483         /* Check for a mailbox event */
4484         if (icr & E1000_ICR_VMMB)
4485                 igb_msg_task(adapter);
4486
4487         if (icr & E1000_ICR_LSC) {
4488                 hw->mac.get_link_status = 1;
4489                 /* guard against interrupt when we're going down */
4490                 if (!test_bit(__IGB_DOWN, &adapter->state))
4491                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4492         }
4493
4494         if (adapter->vfs_allocated_count)
4495                 wr32(E1000_IMS, E1000_IMS_LSC |
4496                                 E1000_IMS_VMMB |
4497                                 E1000_IMS_DOUTSYNC);
4498         else
4499                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4500         wr32(E1000_EIMS, adapter->eims_other);
4501
4502         return IRQ_HANDLED;
4503 }
4504
4505 static void igb_write_itr(struct igb_q_vector *q_vector)
4506 {
4507         struct igb_adapter *adapter = q_vector->adapter;
4508         u32 itr_val = q_vector->itr_val & 0x7FFC;
4509
4510         if (!q_vector->set_itr)
4511                 return;
4512
4513         if (!itr_val)
4514                 itr_val = 0x4;
4515
4516         if (adapter->hw.mac.type == e1000_82575)
4517                 itr_val |= itr_val << 16;
4518         else
4519                 itr_val |= 0x8000000;
4520
4521         writel(itr_val, q_vector->itr_register);
4522         q_vector->set_itr = 0;
4523 }
4524
4525 static irqreturn_t igb_msix_ring(int irq, void *data)
4526 {
4527         struct igb_q_vector *q_vector = data;
4528
4529         /* Write the ITR value calculated from the previous interrupt. */
4530         igb_write_itr(q_vector);
4531
4532         napi_schedule(&q_vector->napi);
4533
4534         return IRQ_HANDLED;
4535 }
4536
4537 #ifdef CONFIG_IGB_DCA
4538 static void igb_update_dca(struct igb_q_vector *q_vector)
4539 {
4540         struct igb_adapter *adapter = q_vector->adapter;
4541         struct e1000_hw *hw = &adapter->hw;
4542         int cpu = get_cpu();
4543
4544         if (q_vector->cpu == cpu)
4545                 goto out_no_update;
4546
4547         if (q_vector->tx_ring) {
4548                 int q = q_vector->tx_ring->reg_idx;
4549                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4550                 if (hw->mac.type == e1000_82575) {
4551                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4552                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4553                 } else {
4554                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4555                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4556                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4557                 }
4558                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4559                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4560         }
4561         if (q_vector->rx_ring) {
4562                 int q = q_vector->rx_ring->reg_idx;
4563                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4564                 if (hw->mac.type == e1000_82575) {
4565                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4566                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4567                 } else {
4568                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4569                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4570                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4571                 }
4572                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4573                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4574                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4575                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4576         }
4577         q_vector->cpu = cpu;
4578 out_no_update:
4579         put_cpu();
4580 }
4581
4582 static void igb_setup_dca(struct igb_adapter *adapter)
4583 {
4584         struct e1000_hw *hw = &adapter->hw;
4585         int i;
4586
4587         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4588                 return;
4589
4590         /* Always use CB2 mode, difference is masked in the CB driver. */
4591         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4592
4593         for (i = 0; i < adapter->num_q_vectors; i++) {
4594                 adapter->q_vector[i]->cpu = -1;
4595                 igb_update_dca(adapter->q_vector[i]);
4596         }
4597 }
4598
4599 static int __igb_notify_dca(struct device *dev, void *data)
4600 {
4601         struct net_device *netdev = dev_get_drvdata(dev);
4602         struct igb_adapter *adapter = netdev_priv(netdev);
4603         struct pci_dev *pdev = adapter->pdev;
4604         struct e1000_hw *hw = &adapter->hw;
4605         unsigned long event = *(unsigned long *)data;
4606
4607         switch (event) {
4608         case DCA_PROVIDER_ADD:
4609                 /* if already enabled, don't do it again */
4610                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4611                         break;
4612                 if (dca_add_requester(dev) == 0) {
4613                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4614                         dev_info(&pdev->dev, "DCA enabled\n");
4615                         igb_setup_dca(adapter);
4616                         break;
4617                 }
4618                 /* Fall Through since DCA is disabled. */
4619         case DCA_PROVIDER_REMOVE:
4620                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4621                         /* without this a class_device is left
4622                          * hanging around in the sysfs model */
4623                         dca_remove_requester(dev);
4624                         dev_info(&pdev->dev, "DCA disabled\n");
4625                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4626                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4627                 }
4628                 break;
4629         }
4630
4631         return 0;
4632 }
4633
4634 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4635                           void *p)
4636 {
4637         int ret_val;
4638
4639         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4640                                          __igb_notify_dca);
4641
4642         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4643 }
4644 #endif /* CONFIG_IGB_DCA */
4645
4646 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4647 {
4648         struct e1000_hw *hw = &adapter->hw;
4649         u32 ping;
4650         int i;
4651
4652         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4653                 ping = E1000_PF_CONTROL_MSG;
4654                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4655                         ping |= E1000_VT_MSGTYPE_CTS;
4656                 igb_write_mbx(hw, &ping, 1, i);
4657         }
4658 }
4659
4660 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4661 {
4662         struct e1000_hw *hw = &adapter->hw;
4663         u32 vmolr = rd32(E1000_VMOLR(vf));
4664         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4665
4666         vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC |
4667                             IGB_VF_FLAG_MULTI_PROMISC);
4668         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4669
4670         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4671                 vmolr |= E1000_VMOLR_MPME;
4672                 vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC;
4673                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4674         } else {
4675                 /*
4676                  * if we have hashes and we are clearing a multicast promisc
4677                  * flag we need to write the hashes to the MTA as this step
4678                  * was previously skipped
4679                  */
4680                 if (vf_data->num_vf_mc_hashes > 30) {
4681                         vmolr |= E1000_VMOLR_MPME;
4682                 } else if (vf_data->num_vf_mc_hashes) {
4683                         int j;
4684                         vmolr |= E1000_VMOLR_ROMPE;
4685                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4686                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4687                 }
4688         }
4689
4690         wr32(E1000_VMOLR(vf), vmolr);
4691
4692         /* there are flags left unprocessed, likely not supported */
4693         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4694                 return -EINVAL;
4695
4696         return 0;
4697
4698 }
4699
4700 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4701                                   u32 *msgbuf, u32 vf)
4702 {
4703         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4704         u16 *hash_list = (u16 *)&msgbuf[1];
4705         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4706         int i;
4707
4708         /* salt away the number of multicast addresses assigned
4709          * to this VF for later use to restore when the PF multi cast
4710          * list changes
4711          */
4712         vf_data->num_vf_mc_hashes = n;
4713
4714         /* only up to 30 hash values supported */
4715         if (n > 30)
4716                 n = 30;
4717
4718         /* store the hashes for later use */
4719         for (i = 0; i < n; i++)
4720                 vf_data->vf_mc_hashes[i] = hash_list[i];
4721
4722         /* Flush and reset the mta with the new values */
4723         igb_set_rx_mode(adapter->netdev);
4724
4725         return 0;
4726 }
4727
4728 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4729 {
4730         struct e1000_hw *hw = &adapter->hw;
4731         struct vf_data_storage *vf_data;
4732         int i, j;
4733
4734         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4735                 u32 vmolr = rd32(E1000_VMOLR(i));
4736                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4737
4738                 vf_data = &adapter->vf_data[i];
4739
4740                 if ((vf_data->num_vf_mc_hashes > 30) ||
4741                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4742                         vmolr |= E1000_VMOLR_MPME;
4743                 } else if (vf_data->num_vf_mc_hashes) {
4744                         vmolr |= E1000_VMOLR_ROMPE;
4745                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4746                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4747                 }
4748                 wr32(E1000_VMOLR(i), vmolr);
4749         }
4750 }
4751
4752 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4753 {
4754         struct e1000_hw *hw = &adapter->hw;
4755         u32 pool_mask, reg, vid;
4756         int i;
4757
4758         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4759
4760         /* Find the vlan filter for this id */
4761         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4762                 reg = rd32(E1000_VLVF(i));
4763
4764                 /* remove the vf from the pool */
4765                 reg &= ~pool_mask;
4766
4767                 /* if pool is empty then remove entry from vfta */
4768                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4769                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4770                         reg = 0;
4771                         vid = reg & E1000_VLVF_VLANID_MASK;
4772                         igb_vfta_set(hw, vid, false);
4773                 }
4774
4775                 wr32(E1000_VLVF(i), reg);
4776         }
4777
4778         adapter->vf_data[vf].vlans_enabled = 0;
4779 }
4780
4781 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4782 {
4783         struct e1000_hw *hw = &adapter->hw;
4784         u32 reg, i;
4785
4786         /* The vlvf table only exists on 82576 hardware and newer */
4787         if (hw->mac.type < e1000_82576)
4788                 return -1;
4789
4790         /* we only need to do this if VMDq is enabled */
4791         if (!adapter->vfs_allocated_count)
4792                 return -1;
4793
4794         /* Find the vlan filter for this id */
4795         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4796                 reg = rd32(E1000_VLVF(i));
4797                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4798                     vid == (reg & E1000_VLVF_VLANID_MASK))
4799                         break;
4800         }
4801
4802         if (add) {
4803                 if (i == E1000_VLVF_ARRAY_SIZE) {
4804                         /* Did not find a matching VLAN ID entry that was
4805                          * enabled.  Search for a free filter entry, i.e.
4806                          * one without the enable bit set
4807                          */
4808                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4809                                 reg = rd32(E1000_VLVF(i));
4810                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4811                                         break;
4812                         }
4813                 }
4814                 if (i < E1000_VLVF_ARRAY_SIZE) {
4815                         /* Found an enabled/available entry */
4816                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4817
4818                         /* if !enabled we need to set this up in vfta */
4819                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4820                                 /* add VID to filter table */
4821                                 igb_vfta_set(hw, vid, true);
4822                                 reg |= E1000_VLVF_VLANID_ENABLE;
4823                         }
4824                         reg &= ~E1000_VLVF_VLANID_MASK;
4825                         reg |= vid;
4826                         wr32(E1000_VLVF(i), reg);
4827
4828                         /* do not modify RLPML for PF devices */
4829                         if (vf >= adapter->vfs_allocated_count)
4830                                 return 0;
4831
4832                         if (!adapter->vf_data[vf].vlans_enabled) {
4833                                 u32 size;
4834                                 reg = rd32(E1000_VMOLR(vf));
4835                                 size = reg & E1000_VMOLR_RLPML_MASK;
4836                                 size += 4;
4837                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4838                                 reg |= size;
4839                                 wr32(E1000_VMOLR(vf), reg);
4840                         }
4841
4842                         adapter->vf_data[vf].vlans_enabled++;
4843                         return 0;
4844                 }
4845         } else {
4846                 if (i < E1000_VLVF_ARRAY_SIZE) {
4847                         /* remove vf from the pool */
4848                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4849                         /* if pool is empty then remove entry from vfta */
4850                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4851                                 reg = 0;
4852                                 igb_vfta_set(hw, vid, false);
4853                         }
4854                         wr32(E1000_VLVF(i), reg);
4855
4856                         /* do not modify RLPML for PF devices */
4857                         if (vf >= adapter->vfs_allocated_count)
4858                                 return 0;
4859
4860                         adapter->vf_data[vf].vlans_enabled--;
4861                         if (!adapter->vf_data[vf].vlans_enabled) {
4862                                 u32 size;
4863                                 reg = rd32(E1000_VMOLR(vf));
4864                                 size = reg & E1000_VMOLR_RLPML_MASK;
4865                                 size -= 4;
4866                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4867                                 reg |= size;
4868                                 wr32(E1000_VMOLR(vf), reg);
4869                         }
4870                 }
4871         }
4872         return 0;
4873 }
4874
4875 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4876 {
4877         struct e1000_hw *hw = &adapter->hw;
4878
4879         if (vid)
4880                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4881         else
4882                 wr32(E1000_VMVIR(vf), 0);
4883 }
4884
4885 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4886                                int vf, u16 vlan, u8 qos)
4887 {
4888         int err = 0;
4889         struct igb_adapter *adapter = netdev_priv(netdev);
4890
4891         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4892                 return -EINVAL;
4893         if (vlan || qos) {
4894                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4895                 if (err)
4896                         goto out;
4897                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4898                 igb_set_vmolr(adapter, vf, !vlan);
4899                 adapter->vf_data[vf].pf_vlan = vlan;
4900                 adapter->vf_data[vf].pf_qos = qos;
4901                 dev_info(&adapter->pdev->dev,
4902                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4903                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4904                         dev_warn(&adapter->pdev->dev,
4905                                  "The VF VLAN has been set,"
4906                                  " but the PF device is not up.\n");
4907                         dev_warn(&adapter->pdev->dev,
4908                                  "Bring the PF device up before"
4909                                  " attempting to use the VF device.\n");
4910                 }
4911         } else {
4912                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4913                                    false, vf);
4914                 igb_set_vmvir(adapter, vlan, vf);
4915                 igb_set_vmolr(adapter, vf, true);
4916                 adapter->vf_data[vf].pf_vlan = 0;
4917                 adapter->vf_data[vf].pf_qos = 0;
4918        }
4919 out:
4920        return err;
4921 }
4922
4923 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4924 {
4925         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4926         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4927
4928         return igb_vlvf_set(adapter, vid, add, vf);
4929 }
4930
4931 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4932 {
4933         /* clear flags */
4934         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4935         adapter->vf_data[vf].last_nack = jiffies;
4936
4937         /* reset offloads to defaults */
4938         igb_set_vmolr(adapter, vf, true);
4939
4940         /* reset vlans for device */
4941         igb_clear_vf_vfta(adapter, vf);
4942         if (adapter->vf_data[vf].pf_vlan)
4943                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4944                                     adapter->vf_data[vf].pf_vlan,
4945                                     adapter->vf_data[vf].pf_qos);
4946         else
4947                 igb_clear_vf_vfta(adapter, vf);
4948
4949         /* reset multicast table array for vf */
4950         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4951
4952         /* Flush and reset the mta with the new values */
4953         igb_set_rx_mode(adapter->netdev);
4954 }
4955
4956 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4957 {
4958         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4959
4960         /* generate a new mac address as we were hotplug removed/added */
4961         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4962                 random_ether_addr(vf_mac);
4963
4964         /* process remaining reset events */
4965         igb_vf_reset(adapter, vf);
4966 }
4967
4968 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4969 {
4970         struct e1000_hw *hw = &adapter->hw;
4971         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4972         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4973         u32 reg, msgbuf[3];
4974         u8 *addr = (u8 *)(&msgbuf[1]);
4975
4976         /* process all the same items cleared in a function level reset */
4977         igb_vf_reset(adapter, vf);
4978
4979         /* set vf mac address */
4980         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4981
4982         /* enable transmit and receive for vf */
4983         reg = rd32(E1000_VFTE);
4984         wr32(E1000_VFTE, reg | (1 << vf));
4985         reg = rd32(E1000_VFRE);
4986         wr32(E1000_VFRE, reg | (1 << vf));
4987
4988         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4989
4990         /* reply to reset with ack and vf mac address */
4991         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4992         memcpy(addr, vf_mac, 6);
4993         igb_write_mbx(hw, msgbuf, 3, vf);
4994 }
4995
4996 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4997 {
4998         /*
4999          * The VF MAC Address is stored in a packed array of bytes
5000          * starting at the second 32 bit word of the msg array
5001          */
5002         unsigned char *addr = (char *)&msg[1];
5003         int err = -1;
5004
5005         if (is_valid_ether_addr(addr))
5006                 err = igb_set_vf_mac(adapter, vf, addr);
5007
5008         return err;
5009 }
5010
5011 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
5012 {
5013         struct e1000_hw *hw = &adapter->hw;
5014         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5015         u32 msg = E1000_VT_MSGTYPE_NACK;
5016
5017         /* if device isn't clear to send it shouldn't be reading either */
5018         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5019             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5020                 igb_write_mbx(hw, &msg, 1, vf);
5021                 vf_data->last_nack = jiffies;
5022         }
5023 }
5024
5025 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5026 {
5027         struct pci_dev *pdev = adapter->pdev;
5028         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5029         struct e1000_hw *hw = &adapter->hw;
5030         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5031         s32 retval;
5032
5033         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5034
5035         if (retval) {
5036                 /* if receive failed revoke VF CTS stats and restart init */
5037                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5038                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5039                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5040                         return;
5041                 goto out;
5042         }
5043
5044         /* this is a message we already processed, do nothing */
5045         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5046                 return;
5047
5048         /*
5049          * until the vf completes a reset it should not be
5050          * allowed to start any configuration.
5051          */
5052
5053         if (msgbuf[0] == E1000_VF_RESET) {
5054                 igb_vf_reset_msg(adapter, vf);
5055                 return;
5056         }
5057
5058         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5059                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5060                         return;
5061                 retval = -1;
5062                 goto out;
5063         }
5064
5065         switch ((msgbuf[0] & 0xFFFF)) {
5066         case E1000_VF_SET_MAC_ADDR:
5067                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5068                 break;
5069         case E1000_VF_SET_PROMISC:
5070                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5071                 break;
5072         case E1000_VF_SET_MULTICAST:
5073                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5074                 break;
5075         case E1000_VF_SET_LPE:
5076                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5077                 break;
5078         case E1000_VF_SET_VLAN:
5079                 if (adapter->vf_data[vf].pf_vlan)
5080                         retval = -1;
5081                 else
5082                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5083                 break;
5084         default:
5085                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5086                 retval = -1;
5087                 break;
5088         }
5089
5090         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5091 out:
5092         /* notify the VF of the results of what it sent us */
5093         if (retval)
5094                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5095         else
5096                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5097
5098         igb_write_mbx(hw, msgbuf, 1, vf);
5099 }
5100
5101 static void igb_msg_task(struct igb_adapter *adapter)
5102 {
5103         struct e1000_hw *hw = &adapter->hw;
5104         u32 vf;
5105
5106         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5107                 /* process any reset requests */
5108                 if (!igb_check_for_rst(hw, vf))
5109                         igb_vf_reset_event(adapter, vf);
5110
5111                 /* process any messages pending */
5112                 if (!igb_check_for_msg(hw, vf))
5113                         igb_rcv_msg_from_vf(adapter, vf);
5114
5115                 /* process any acks */
5116                 if (!igb_check_for_ack(hw, vf))
5117                         igb_rcv_ack_from_vf(adapter, vf);
5118         }
5119 }
5120
5121 /**
5122  *  igb_set_uta - Set unicast filter table address
5123  *  @adapter: board private structure
5124  *
5125  *  The unicast table address is a register array of 32-bit registers.
5126  *  The table is meant to be used in a way similar to how the MTA is used
5127  *  however due to certain limitations in the hardware it is necessary to
5128  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5129  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5130  **/
5131 static void igb_set_uta(struct igb_adapter *adapter)
5132 {
5133         struct e1000_hw *hw = &adapter->hw;
5134         int i;
5135
5136         /* The UTA table only exists on 82576 hardware and newer */
5137         if (hw->mac.type < e1000_82576)
5138                 return;
5139
5140         /* we only need to do this if VMDq is enabled */
5141         if (!adapter->vfs_allocated_count)
5142                 return;
5143
5144         for (i = 0; i < hw->mac.uta_reg_count; i++)
5145                 array_wr32(E1000_UTA, i, ~0);
5146 }
5147
5148 /**
5149  * igb_intr_msi - Interrupt Handler
5150  * @irq: interrupt number
5151  * @data: pointer to a network interface device structure
5152  **/
5153 static irqreturn_t igb_intr_msi(int irq, void *data)
5154 {
5155         struct igb_adapter *adapter = data;
5156         struct igb_q_vector *q_vector = adapter->q_vector[0];
5157         struct e1000_hw *hw = &adapter->hw;
5158         /* read ICR disables interrupts using IAM */
5159         u32 icr = rd32(E1000_ICR);
5160
5161         igb_write_itr(q_vector);
5162
5163         if (icr & E1000_ICR_DRSTA)
5164                 schedule_work(&adapter->reset_task);
5165
5166         if (icr & E1000_ICR_DOUTSYNC) {
5167                 /* HW is reporting DMA is out of sync */
5168                 adapter->stats.doosync++;
5169         }
5170
5171         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5172                 hw->mac.get_link_status = 1;
5173                 if (!test_bit(__IGB_DOWN, &adapter->state))
5174                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5175         }
5176
5177         napi_schedule(&q_vector->napi);
5178
5179         return IRQ_HANDLED;
5180 }
5181
5182 /**
5183  * igb_intr - Legacy Interrupt Handler
5184  * @irq: interrupt number
5185  * @data: pointer to a network interface device structure
5186  **/
5187 static irqreturn_t igb_intr(int irq, void *data)
5188 {
5189         struct igb_adapter *adapter = data;
5190         struct igb_q_vector *q_vector = adapter->q_vector[0];
5191         struct e1000_hw *hw = &adapter->hw;
5192         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5193          * need for the IMC write */
5194         u32 icr = rd32(E1000_ICR);
5195         if (!icr)
5196                 return IRQ_NONE;  /* Not our interrupt */
5197
5198         igb_write_itr(q_vector);
5199
5200         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5201          * not set, then the adapter didn't send an interrupt */
5202         if (!(icr & E1000_ICR_INT_ASSERTED))
5203                 return IRQ_NONE;
5204
5205         if (icr & E1000_ICR_DRSTA)
5206                 schedule_work(&adapter->reset_task);
5207
5208         if (icr & E1000_ICR_DOUTSYNC) {
5209                 /* HW is reporting DMA is out of sync */
5210                 adapter->stats.doosync++;
5211         }
5212
5213         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5214                 hw->mac.get_link_status = 1;
5215                 /* guard against interrupt when we're going down */
5216                 if (!test_bit(__IGB_DOWN, &adapter->state))
5217                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5218         }
5219
5220         napi_schedule(&q_vector->napi);
5221
5222         return IRQ_HANDLED;
5223 }
5224
5225 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5226 {
5227         struct igb_adapter *adapter = q_vector->adapter;
5228         struct e1000_hw *hw = &adapter->hw;
5229
5230         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5231             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5232                 if (!adapter->msix_entries)
5233                         igb_set_itr(adapter);
5234                 else
5235                         igb_update_ring_itr(q_vector);
5236         }
5237
5238         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5239                 if (adapter->msix_entries)
5240                         wr32(E1000_EIMS, q_vector->eims_value);
5241                 else
5242                         igb_irq_enable(adapter);
5243         }
5244 }
5245
5246 /**
5247  * igb_poll - NAPI Rx polling callback
5248  * @napi: napi polling structure
5249  * @budget: count of how many packets we should handle
5250  **/
5251 static int igb_poll(struct napi_struct *napi, int budget)
5252 {
5253         struct igb_q_vector *q_vector = container_of(napi,
5254                                                      struct igb_q_vector,
5255                                                      napi);
5256         int tx_clean_complete = 1, work_done = 0;
5257
5258 #ifdef CONFIG_IGB_DCA
5259         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5260                 igb_update_dca(q_vector);
5261 #endif
5262         if (q_vector->tx_ring)
5263                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5264
5265         if (q_vector->rx_ring)
5266                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5267
5268         if (!tx_clean_complete)
5269                 work_done = budget;
5270
5271         /* If not enough Rx work done, exit the polling mode */
5272         if (work_done < budget) {
5273                 napi_complete(napi);
5274                 igb_ring_irq_enable(q_vector);
5275         }
5276
5277         return work_done;
5278 }
5279
5280 /**
5281  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5282  * @adapter: board private structure
5283  * @shhwtstamps: timestamp structure to update
5284  * @regval: unsigned 64bit system time value.
5285  *
5286  * We need to convert the system time value stored in the RX/TXSTMP registers
5287  * into a hwtstamp which can be used by the upper level timestamping functions
5288  */
5289 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5290                                    struct skb_shared_hwtstamps *shhwtstamps,
5291                                    u64 regval)
5292 {
5293         u64 ns;
5294
5295         /*
5296          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5297          * 24 to match clock shift we setup earlier.
5298          */
5299         if (adapter->hw.mac.type == e1000_82580)
5300                 regval <<= IGB_82580_TSYNC_SHIFT;
5301
5302         ns = timecounter_cyc2time(&adapter->clock, regval);
5303         timecompare_update(&adapter->compare, ns);
5304         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5305         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5306         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5307 }
5308
5309 /**
5310  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5311  * @q_vector: pointer to q_vector containing needed info
5312  * @buffer: pointer to igb_buffer structure
5313  *
5314  * If we were asked to do hardware stamping and such a time stamp is
5315  * available, then it must have been for this skb here because we only
5316  * allow only one such packet into the queue.
5317  */
5318 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5319 {
5320         struct igb_adapter *adapter = q_vector->adapter;
5321         struct e1000_hw *hw = &adapter->hw;
5322         struct skb_shared_hwtstamps shhwtstamps;
5323         u64 regval;
5324
5325         /* if skb does not support hw timestamp or TX stamp not valid exit */
5326         if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) ||
5327             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5328                 return;
5329
5330         regval = rd32(E1000_TXSTMPL);
5331         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5332
5333         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5334         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5335 }
5336
5337 /**
5338  * igb_clean_tx_irq - Reclaim resources after transmit completes
5339  * @q_vector: pointer to q_vector containing needed info
5340  * returns true if ring is completely cleaned
5341  **/
5342 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5343 {
5344         struct igb_adapter *adapter = q_vector->adapter;
5345         struct igb_ring *tx_ring = q_vector->tx_ring;
5346         struct net_device *netdev = tx_ring->netdev;
5347         struct e1000_hw *hw = &adapter->hw;
5348         struct igb_buffer *buffer_info;
5349         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5350         unsigned int total_bytes = 0, total_packets = 0;
5351         unsigned int i, eop, count = 0;
5352         bool cleaned = false;
5353
5354         i = tx_ring->next_to_clean;
5355         eop = tx_ring->buffer_info[i].next_to_watch;
5356         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5357
5358         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5359                (count < tx_ring->count)) {
5360                 rmb();  /* read buffer_info after eop_desc status */
5361                 for (cleaned = false; !cleaned; count++) {
5362                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5363                         buffer_info = &tx_ring->buffer_info[i];
5364                         cleaned = (i == eop);
5365
5366                         if (buffer_info->skb) {
5367                                 total_bytes += buffer_info->bytecount;
5368                                 /* gso_segs is currently only valid for tcp */
5369                                 total_packets += buffer_info->gso_segs;
5370                                 igb_tx_hwtstamp(q_vector, buffer_info);
5371                         }
5372
5373                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5374                         tx_desc->wb.status = 0;
5375
5376                         i++;
5377                         if (i == tx_ring->count)
5378                                 i = 0;
5379                 }
5380                 eop = tx_ring->buffer_info[i].next_to_watch;
5381                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5382         }
5383
5384         tx_ring->next_to_clean = i;
5385
5386         if (unlikely(count &&
5387                      netif_carrier_ok(netdev) &&
5388                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5389                 /* Make sure that anybody stopping the queue after this
5390                  * sees the new next_to_clean.
5391                  */
5392                 smp_mb();
5393                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5394                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5395                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5396                         tx_ring->tx_stats.restart_queue++;
5397                 }
5398         }
5399
5400         if (tx_ring->detect_tx_hung) {
5401                 /* Detect a transmit hang in hardware, this serializes the
5402                  * check with the clearing of time_stamp and movement of i */
5403                 tx_ring->detect_tx_hung = false;
5404                 if (tx_ring->buffer_info[i].time_stamp &&
5405                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5406                                (adapter->tx_timeout_factor * HZ)) &&
5407                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5408
5409                         /* detected Tx unit hang */
5410                         dev_err(tx_ring->dev,
5411                                 "Detected Tx Unit Hang\n"
5412                                 "  Tx Queue             <%d>\n"
5413                                 "  TDH                  <%x>\n"
5414                                 "  TDT                  <%x>\n"
5415                                 "  next_to_use          <%x>\n"
5416                                 "  next_to_clean        <%x>\n"
5417                                 "buffer_info[next_to_clean]\n"
5418                                 "  time_stamp           <%lx>\n"
5419                                 "  next_to_watch        <%x>\n"
5420                                 "  jiffies              <%lx>\n"
5421                                 "  desc.status          <%x>\n",
5422                                 tx_ring->queue_index,
5423                                 readl(tx_ring->head),
5424                                 readl(tx_ring->tail),
5425                                 tx_ring->next_to_use,
5426                                 tx_ring->next_to_clean,
5427                                 tx_ring->buffer_info[eop].time_stamp,
5428                                 eop,
5429                                 jiffies,
5430                                 eop_desc->wb.status);
5431                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5432                 }
5433         }
5434         tx_ring->total_bytes += total_bytes;
5435         tx_ring->total_packets += total_packets;
5436         tx_ring->tx_stats.bytes += total_bytes;
5437         tx_ring->tx_stats.packets += total_packets;
5438         return (count < tx_ring->count);
5439 }
5440
5441 /**
5442  * igb_receive_skb - helper function to handle rx indications
5443  * @q_vector: structure containing interrupt and ring information
5444  * @skb: packet to send up
5445  * @vlan_tag: vlan tag for packet
5446  **/
5447 static void igb_receive_skb(struct igb_q_vector *q_vector,
5448                             struct sk_buff *skb,
5449                             u16 vlan_tag)
5450 {
5451         struct igb_adapter *adapter = q_vector->adapter;
5452
5453         if (vlan_tag && adapter->vlgrp)
5454                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5455                                  vlan_tag, skb);
5456         else
5457                 napi_gro_receive(&q_vector->napi, skb);
5458 }
5459
5460 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5461                                        u32 status_err, struct sk_buff *skb)
5462 {
5463         skb_checksum_none_assert(skb);
5464
5465         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5466         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5467              (status_err & E1000_RXD_STAT_IXSM))
5468                 return;
5469
5470         /* TCP/UDP checksum error bit is set */
5471         if (status_err &
5472             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5473                 /*
5474                  * work around errata with sctp packets where the TCPE aka
5475                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5476                  * packets, (aka let the stack check the crc32c)
5477                  */
5478                 if ((skb->len == 60) &&
5479                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5480                         ring->rx_stats.csum_err++;
5481
5482                 /* let the stack verify checksum errors */
5483                 return;
5484         }
5485         /* It must be a TCP or UDP packet with a valid checksum */
5486         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5487                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5488
5489         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5490 }
5491
5492 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5493                                    struct sk_buff *skb)
5494 {
5495         struct igb_adapter *adapter = q_vector->adapter;
5496         struct e1000_hw *hw = &adapter->hw;
5497         u64 regval;
5498
5499         /*
5500          * If this bit is set, then the RX registers contain the time stamp. No
5501          * other packet will be time stamped until we read these registers, so
5502          * read the registers to make them available again. Because only one
5503          * packet can be time stamped at a time, we know that the register
5504          * values must belong to this one here and therefore we don't need to
5505          * compare any of the additional attributes stored for it.
5506          *
5507          * If nothing went wrong, then it should have a shared tx_flags that we
5508          * can turn into a skb_shared_hwtstamps.
5509          */
5510         if (staterr & E1000_RXDADV_STAT_TSIP) {
5511                 u32 *stamp = (u32 *)skb->data;
5512                 regval = le32_to_cpu(*(stamp + 2));
5513                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5514                 skb_pull(skb, IGB_TS_HDR_LEN);
5515         } else {
5516                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5517                         return;
5518
5519                 regval = rd32(E1000_RXSTMPL);
5520                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5521         }
5522
5523         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5524 }
5525 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5526                                union e1000_adv_rx_desc *rx_desc)
5527 {
5528         /* HW will not DMA in data larger than the given buffer, even if it
5529          * parses the (NFS, of course) header to be larger.  In that case, it
5530          * fills the header buffer and spills the rest into the page.
5531          */
5532         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5533                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5534         if (hlen > rx_ring->rx_buffer_len)
5535                 hlen = rx_ring->rx_buffer_len;
5536         return hlen;
5537 }
5538
5539 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5540                                  int *work_done, int budget)
5541 {
5542         struct igb_ring *rx_ring = q_vector->rx_ring;
5543         struct net_device *netdev = rx_ring->netdev;
5544         struct device *dev = rx_ring->dev;
5545         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5546         struct igb_buffer *buffer_info , *next_buffer;
5547         struct sk_buff *skb;
5548         bool cleaned = false;
5549         int cleaned_count = 0;
5550         int current_node = numa_node_id();
5551         unsigned int total_bytes = 0, total_packets = 0;
5552         unsigned int i;
5553         u32 staterr;
5554         u16 length;
5555         u16 vlan_tag;
5556
5557         i = rx_ring->next_to_clean;
5558         buffer_info = &rx_ring->buffer_info[i];
5559         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5560         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5561
5562         while (staterr & E1000_RXD_STAT_DD) {
5563                 if (*work_done >= budget)
5564                         break;
5565                 (*work_done)++;
5566                 rmb(); /* read descriptor and rx_buffer_info after status DD */
5567
5568                 skb = buffer_info->skb;
5569                 prefetch(skb->data - NET_IP_ALIGN);
5570                 buffer_info->skb = NULL;
5571
5572                 i++;
5573                 if (i == rx_ring->count)
5574                         i = 0;
5575
5576                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5577                 prefetch(next_rxd);
5578                 next_buffer = &rx_ring->buffer_info[i];
5579
5580                 length = le16_to_cpu(rx_desc->wb.upper.length);
5581                 cleaned = true;
5582                 cleaned_count++;
5583
5584                 if (buffer_info->dma) {
5585                         dma_unmap_single(dev, buffer_info->dma,
5586                                          rx_ring->rx_buffer_len,
5587                                          DMA_FROM_DEVICE);
5588                         buffer_info->dma = 0;
5589                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5590                                 skb_put(skb, length);
5591                                 goto send_up;
5592                         }
5593                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5594                 }
5595
5596                 if (length) {
5597                         dma_unmap_page(dev, buffer_info->page_dma,
5598                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5599                         buffer_info->page_dma = 0;
5600
5601                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5602                                                 buffer_info->page,
5603                                                 buffer_info->page_offset,
5604                                                 length);
5605
5606                         if ((page_count(buffer_info->page) != 1) ||
5607                             (page_to_nid(buffer_info->page) != current_node))
5608                                 buffer_info->page = NULL;
5609                         else
5610                                 get_page(buffer_info->page);
5611
5612                         skb->len += length;
5613                         skb->data_len += length;
5614                         skb->truesize += length;
5615                 }
5616
5617                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5618                         buffer_info->skb = next_buffer->skb;
5619                         buffer_info->dma = next_buffer->dma;
5620                         next_buffer->skb = skb;
5621                         next_buffer->dma = 0;
5622                         goto next_desc;
5623                 }
5624 send_up:
5625                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5626                         dev_kfree_skb_irq(skb);
5627                         goto next_desc;
5628                 }
5629
5630                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5631                         igb_rx_hwtstamp(q_vector, staterr, skb);
5632                 total_bytes += skb->len;
5633                 total_packets++;
5634
5635                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5636
5637                 skb->protocol = eth_type_trans(skb, netdev);
5638                 skb_record_rx_queue(skb, rx_ring->queue_index);
5639
5640                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5641                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5642
5643                 igb_receive_skb(q_vector, skb, vlan_tag);
5644
5645 next_desc:
5646                 rx_desc->wb.upper.status_error = 0;
5647
5648                 /* return some buffers to hardware, one at a time is too slow */
5649                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5650                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5651                         cleaned_count = 0;
5652                 }
5653
5654                 /* use prefetched values */
5655                 rx_desc = next_rxd;
5656                 buffer_info = next_buffer;
5657                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5658         }
5659
5660         rx_ring->next_to_clean = i;
5661         cleaned_count = igb_desc_unused(rx_ring);
5662
5663         if (cleaned_count)
5664                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5665
5666         rx_ring->total_packets += total_packets;
5667         rx_ring->total_bytes += total_bytes;
5668         rx_ring->rx_stats.packets += total_packets;
5669         rx_ring->rx_stats.bytes += total_bytes;
5670         return cleaned;
5671 }
5672
5673 /**
5674  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5675  * @adapter: address of board private structure
5676  **/
5677 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5678 {
5679         struct net_device *netdev = rx_ring->netdev;
5680         union e1000_adv_rx_desc *rx_desc;
5681         struct igb_buffer *buffer_info;
5682         struct sk_buff *skb;
5683         unsigned int i;
5684         int bufsz;
5685
5686         i = rx_ring->next_to_use;
5687         buffer_info = &rx_ring->buffer_info[i];
5688
5689         bufsz = rx_ring->rx_buffer_len;
5690
5691         while (cleaned_count--) {
5692                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5693
5694                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5695                         if (!buffer_info->page) {
5696                                 buffer_info->page = netdev_alloc_page(netdev);
5697                                 if (!buffer_info->page) {
5698                                         rx_ring->rx_stats.alloc_failed++;
5699                                         goto no_buffers;
5700                                 }
5701                                 buffer_info->page_offset = 0;
5702                         } else {
5703                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5704                         }
5705                         buffer_info->page_dma =
5706                                 dma_map_page(rx_ring->dev, buffer_info->page,
5707                                              buffer_info->page_offset,
5708                                              PAGE_SIZE / 2,
5709                                              DMA_FROM_DEVICE);
5710                         if (dma_mapping_error(rx_ring->dev,
5711                                               buffer_info->page_dma)) {
5712                                 buffer_info->page_dma = 0;
5713                                 rx_ring->rx_stats.alloc_failed++;
5714                                 goto no_buffers;
5715                         }
5716                 }
5717
5718                 skb = buffer_info->skb;
5719                 if (!skb) {
5720                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5721                         if (!skb) {
5722                                 rx_ring->rx_stats.alloc_failed++;
5723                                 goto no_buffers;
5724                         }
5725
5726                         buffer_info->skb = skb;
5727                 }
5728                 if (!buffer_info->dma) {
5729                         buffer_info->dma = dma_map_single(rx_ring->dev,
5730                                                           skb->data,
5731                                                           bufsz,
5732                                                           DMA_FROM_DEVICE);
5733                         if (dma_mapping_error(rx_ring->dev,
5734                                               buffer_info->dma)) {
5735                                 buffer_info->dma = 0;
5736                                 rx_ring->rx_stats.alloc_failed++;
5737                                 goto no_buffers;
5738                         }
5739                 }
5740                 /* Refresh the desc even if buffer_addrs didn't change because
5741                  * each write-back erases this info. */
5742                 if (bufsz < IGB_RXBUFFER_1024) {
5743                         rx_desc->read.pkt_addr =
5744                              cpu_to_le64(buffer_info->page_dma);
5745                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5746                 } else {
5747                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5748                         rx_desc->read.hdr_addr = 0;
5749                 }
5750
5751                 i++;
5752                 if (i == rx_ring->count)
5753                         i = 0;
5754                 buffer_info = &rx_ring->buffer_info[i];
5755         }
5756
5757 no_buffers:
5758         if (rx_ring->next_to_use != i) {
5759                 rx_ring->next_to_use = i;
5760                 if (i == 0)
5761                         i = (rx_ring->count - 1);
5762                 else
5763                         i--;
5764
5765                 /* Force memory writes to complete before letting h/w
5766                  * know there are new descriptors to fetch.  (Only
5767                  * applicable for weak-ordered memory model archs,
5768                  * such as IA-64). */
5769                 wmb();
5770                 writel(i, rx_ring->tail);
5771         }
5772 }
5773
5774 /**
5775  * igb_mii_ioctl -
5776  * @netdev:
5777  * @ifreq:
5778  * @cmd:
5779  **/
5780 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5781 {
5782         struct igb_adapter *adapter = netdev_priv(netdev);
5783         struct mii_ioctl_data *data = if_mii(ifr);
5784
5785         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5786                 return -EOPNOTSUPP;
5787
5788         switch (cmd) {
5789         case SIOCGMIIPHY:
5790                 data->phy_id = adapter->hw.phy.addr;
5791                 break;
5792         case SIOCGMIIREG:
5793                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5794                                      &data->val_out))
5795                         return -EIO;
5796                 break;
5797         case SIOCSMIIREG:
5798         default:
5799                 return -EOPNOTSUPP;
5800         }
5801         return 0;
5802 }
5803
5804 /**
5805  * igb_hwtstamp_ioctl - control hardware time stamping
5806  * @netdev:
5807  * @ifreq:
5808  * @cmd:
5809  *
5810  * Outgoing time stamping can be enabled and disabled. Play nice and
5811  * disable it when requested, although it shouldn't case any overhead
5812  * when no packet needs it. At most one packet in the queue may be
5813  * marked for time stamping, otherwise it would be impossible to tell
5814  * for sure to which packet the hardware time stamp belongs.
5815  *
5816  * Incoming time stamping has to be configured via the hardware
5817  * filters. Not all combinations are supported, in particular event
5818  * type has to be specified. Matching the kind of event packet is
5819  * not supported, with the exception of "all V2 events regardless of
5820  * level 2 or 4".
5821  *
5822  **/
5823 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5824                               struct ifreq *ifr, int cmd)
5825 {
5826         struct igb_adapter *adapter = netdev_priv(netdev);
5827         struct e1000_hw *hw = &adapter->hw;
5828         struct hwtstamp_config config;
5829         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5830         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5831         u32 tsync_rx_cfg = 0;
5832         bool is_l4 = false;
5833         bool is_l2 = false;
5834         u32 regval;
5835
5836         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5837                 return -EFAULT;
5838
5839         /* reserved for future extensions */
5840         if (config.flags)
5841                 return -EINVAL;
5842
5843         switch (config.tx_type) {
5844         case HWTSTAMP_TX_OFF:
5845                 tsync_tx_ctl = 0;
5846         case HWTSTAMP_TX_ON:
5847                 break;
5848         default:
5849                 return -ERANGE;
5850         }
5851
5852         switch (config.rx_filter) {
5853         case HWTSTAMP_FILTER_NONE:
5854                 tsync_rx_ctl = 0;
5855                 break;
5856         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5857         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5858         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5859         case HWTSTAMP_FILTER_ALL:
5860                 /*
5861                  * register TSYNCRXCFG must be set, therefore it is not
5862                  * possible to time stamp both Sync and Delay_Req messages
5863                  * => fall back to time stamping all packets
5864                  */
5865                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5866                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5867                 break;
5868         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5869                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5870                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5871                 is_l4 = true;
5872                 break;
5873         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5874                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5875                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5876                 is_l4 = true;
5877                 break;
5878         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5879         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5880                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5881                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5882                 is_l2 = true;
5883                 is_l4 = true;
5884                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5885                 break;
5886         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5887         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5888                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5889                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5890                 is_l2 = true;
5891                 is_l4 = true;
5892                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5893                 break;
5894         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5895         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5896         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5897                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5898                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5899                 is_l2 = true;
5900                 break;
5901         default:
5902                 return -ERANGE;
5903         }
5904
5905         if (hw->mac.type == e1000_82575) {
5906                 if (tsync_rx_ctl | tsync_tx_ctl)
5907                         return -EINVAL;
5908                 return 0;
5909         }
5910
5911         /*
5912          * Per-packet timestamping only works if all packets are
5913          * timestamped, so enable timestamping in all packets as
5914          * long as one rx filter was configured.
5915          */
5916         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5917                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5918                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5919         }
5920
5921         /* enable/disable TX */
5922         regval = rd32(E1000_TSYNCTXCTL);
5923         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5924         regval |= tsync_tx_ctl;
5925         wr32(E1000_TSYNCTXCTL, regval);
5926
5927         /* enable/disable RX */
5928         regval = rd32(E1000_TSYNCRXCTL);
5929         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5930         regval |= tsync_rx_ctl;
5931         wr32(E1000_TSYNCRXCTL, regval);
5932
5933         /* define which PTP packets are time stamped */
5934         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5935
5936         /* define ethertype filter for timestamped packets */
5937         if (is_l2)
5938                 wr32(E1000_ETQF(3),
5939                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5940                                  E1000_ETQF_1588 | /* enable timestamping */
5941                                  ETH_P_1588));     /* 1588 eth protocol type */
5942         else
5943                 wr32(E1000_ETQF(3), 0);
5944
5945 #define PTP_PORT 319
5946         /* L4 Queue Filter[3]: filter by destination port and protocol */
5947         if (is_l4) {
5948                 u32 ftqf = (IPPROTO_UDP /* UDP */
5949                         | E1000_FTQF_VF_BP /* VF not compared */
5950                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5951                         | E1000_FTQF_MASK); /* mask all inputs */
5952                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5953
5954                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5955                 wr32(E1000_IMIREXT(3),
5956                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5957                 if (hw->mac.type == e1000_82576) {
5958                         /* enable source port check */
5959                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5960                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5961                 }
5962                 wr32(E1000_FTQF(3), ftqf);
5963         } else {
5964                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5965         }
5966         wrfl();
5967
5968         adapter->hwtstamp_config = config;
5969
5970         /* clear TX/RX time stamp registers, just to be sure */
5971         regval = rd32(E1000_TXSTMPH);
5972         regval = rd32(E1000_RXSTMPH);
5973
5974         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5975                 -EFAULT : 0;
5976 }
5977
5978 /**
5979  * igb_ioctl -
5980  * @netdev:
5981  * @ifreq:
5982  * @cmd:
5983  **/
5984 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5985 {
5986         switch (cmd) {
5987         case SIOCGMIIPHY:
5988         case SIOCGMIIREG:
5989         case SIOCSMIIREG:
5990                 return igb_mii_ioctl(netdev, ifr, cmd);
5991         case SIOCSHWTSTAMP:
5992                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5993         default:
5994                 return -EOPNOTSUPP;
5995         }
5996 }
5997
5998 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5999 {
6000         struct igb_adapter *adapter = hw->back;
6001         u16 cap_offset;
6002
6003         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6004         if (!cap_offset)
6005                 return -E1000_ERR_CONFIG;
6006
6007         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
6008
6009         return 0;
6010 }
6011
6012 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
6013 {
6014         struct igb_adapter *adapter = hw->back;
6015         u16 cap_offset;
6016
6017         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6018         if (!cap_offset)
6019                 return -E1000_ERR_CONFIG;
6020
6021         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6022
6023         return 0;
6024 }
6025
6026 static void igb_vlan_rx_register(struct net_device *netdev,
6027                                  struct vlan_group *grp)
6028 {
6029         struct igb_adapter *adapter = netdev_priv(netdev);
6030         struct e1000_hw *hw = &adapter->hw;
6031         u32 ctrl, rctl;
6032
6033         igb_irq_disable(adapter);
6034         adapter->vlgrp = grp;
6035
6036         if (grp) {
6037                 /* enable VLAN tag insert/strip */
6038                 ctrl = rd32(E1000_CTRL);
6039                 ctrl |= E1000_CTRL_VME;
6040                 wr32(E1000_CTRL, ctrl);
6041
6042                 /* Disable CFI check */
6043                 rctl = rd32(E1000_RCTL);
6044                 rctl &= ~E1000_RCTL_CFIEN;
6045                 wr32(E1000_RCTL, rctl);
6046         } else {
6047                 /* disable VLAN tag insert/strip */
6048                 ctrl = rd32(E1000_CTRL);
6049                 ctrl &= ~E1000_CTRL_VME;
6050                 wr32(E1000_CTRL, ctrl);
6051         }
6052
6053         igb_rlpml_set(adapter);
6054
6055         if (!test_bit(__IGB_DOWN, &adapter->state))
6056                 igb_irq_enable(adapter);
6057 }
6058
6059 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6060 {
6061         struct igb_adapter *adapter = netdev_priv(netdev);
6062         struct e1000_hw *hw = &adapter->hw;
6063         int pf_id = adapter->vfs_allocated_count;
6064
6065         /* attempt to add filter to vlvf array */
6066         igb_vlvf_set(adapter, vid, true, pf_id);
6067
6068         /* add the filter since PF can receive vlans w/o entry in vlvf */
6069         igb_vfta_set(hw, vid, true);
6070 }
6071
6072 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6073 {
6074         struct igb_adapter *adapter = netdev_priv(netdev);
6075         struct e1000_hw *hw = &adapter->hw;
6076         int pf_id = adapter->vfs_allocated_count;
6077         s32 err;
6078
6079         igb_irq_disable(adapter);
6080         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6081
6082         if (!test_bit(__IGB_DOWN, &adapter->state))
6083                 igb_irq_enable(adapter);
6084
6085         /* remove vlan from VLVF table array */
6086         err = igb_vlvf_set(adapter, vid, false, pf_id);
6087
6088         /* if vid was not present in VLVF just remove it from table */
6089         if (err)
6090                 igb_vfta_set(hw, vid, false);
6091 }
6092
6093 static void igb_restore_vlan(struct igb_adapter *adapter)
6094 {
6095         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6096
6097         if (adapter->vlgrp) {
6098                 u16 vid;
6099                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
6100                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6101                                 continue;
6102                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6103                 }
6104         }
6105 }
6106
6107 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6108 {
6109         struct pci_dev *pdev = adapter->pdev;
6110         struct e1000_mac_info *mac = &adapter->hw.mac;
6111
6112         mac->autoneg = 0;
6113
6114         switch (spddplx) {
6115         case SPEED_10 + DUPLEX_HALF:
6116                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6117                 break;
6118         case SPEED_10 + DUPLEX_FULL:
6119                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6120                 break;
6121         case SPEED_100 + DUPLEX_HALF:
6122                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6123                 break;
6124         case SPEED_100 + DUPLEX_FULL:
6125                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6126                 break;
6127         case SPEED_1000 + DUPLEX_FULL:
6128                 mac->autoneg = 1;
6129                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6130                 break;
6131         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6132         default:
6133                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6134                 return -EINVAL;
6135         }
6136         return 0;
6137 }
6138
6139 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6140 {
6141         struct net_device *netdev = pci_get_drvdata(pdev);
6142         struct igb_adapter *adapter = netdev_priv(netdev);
6143         struct e1000_hw *hw = &adapter->hw;
6144         u32 ctrl, rctl, status;
6145         u32 wufc = adapter->wol;
6146 #ifdef CONFIG_PM
6147         int retval = 0;
6148 #endif
6149
6150         netif_device_detach(netdev);
6151
6152         if (netif_running(netdev))
6153                 igb_close(netdev);
6154
6155         igb_clear_interrupt_scheme(adapter);
6156
6157 #ifdef CONFIG_PM
6158         retval = pci_save_state(pdev);
6159         if (retval)
6160                 return retval;
6161 #endif
6162
6163         status = rd32(E1000_STATUS);
6164         if (status & E1000_STATUS_LU)
6165                 wufc &= ~E1000_WUFC_LNKC;
6166
6167         if (wufc) {
6168                 igb_setup_rctl(adapter);
6169                 igb_set_rx_mode(netdev);
6170
6171                 /* turn on all-multi mode if wake on multicast is enabled */
6172                 if (wufc & E1000_WUFC_MC) {
6173                         rctl = rd32(E1000_RCTL);
6174                         rctl |= E1000_RCTL_MPE;
6175                         wr32(E1000_RCTL, rctl);
6176                 }
6177
6178                 ctrl = rd32(E1000_CTRL);
6179                 /* advertise wake from D3Cold */
6180                 #define E1000_CTRL_ADVD3WUC 0x00100000
6181                 /* phy power management enable */
6182                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6183                 ctrl |= E1000_CTRL_ADVD3WUC;
6184                 wr32(E1000_CTRL, ctrl);
6185
6186                 /* Allow time for pending master requests to run */
6187                 igb_disable_pcie_master(hw);
6188
6189                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6190                 wr32(E1000_WUFC, wufc);
6191         } else {
6192                 wr32(E1000_WUC, 0);
6193                 wr32(E1000_WUFC, 0);
6194         }
6195
6196         *enable_wake = wufc || adapter->en_mng_pt;
6197         if (!*enable_wake)
6198                 igb_power_down_link(adapter);
6199         else
6200                 igb_power_up_link(adapter);
6201
6202         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6203          * would have already happened in close and is redundant. */
6204         igb_release_hw_control(adapter);
6205
6206         pci_disable_device(pdev);
6207
6208         return 0;
6209 }
6210
6211 #ifdef CONFIG_PM
6212 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6213 {
6214         int retval;
6215         bool wake;
6216
6217         retval = __igb_shutdown(pdev, &wake);
6218         if (retval)
6219                 return retval;
6220
6221         if (wake) {
6222                 pci_prepare_to_sleep(pdev);
6223         } else {
6224                 pci_wake_from_d3(pdev, false);
6225                 pci_set_power_state(pdev, PCI_D3hot);
6226         }
6227
6228         return 0;
6229 }
6230
6231 static int igb_resume(struct pci_dev *pdev)
6232 {
6233         struct net_device *netdev = pci_get_drvdata(pdev);
6234         struct igb_adapter *adapter = netdev_priv(netdev);
6235         struct e1000_hw *hw = &adapter->hw;
6236         u32 err;
6237
6238         pci_set_power_state(pdev, PCI_D0);
6239         pci_restore_state(pdev);
6240         pci_save_state(pdev);
6241
6242         err = pci_enable_device_mem(pdev);
6243         if (err) {
6244                 dev_err(&pdev->dev,
6245                         "igb: Cannot enable PCI device from suspend\n");
6246                 return err;
6247         }
6248         pci_set_master(pdev);
6249
6250         pci_enable_wake(pdev, PCI_D3hot, 0);
6251         pci_enable_wake(pdev, PCI_D3cold, 0);
6252
6253         if (igb_init_interrupt_scheme(adapter)) {
6254                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6255                 return -ENOMEM;
6256         }
6257
6258         igb_reset(adapter);
6259
6260         /* let the f/w know that the h/w is now under the control of the
6261          * driver. */
6262         igb_get_hw_control(adapter);
6263
6264         wr32(E1000_WUS, ~0);
6265
6266         if (netif_running(netdev)) {
6267                 err = igb_open(netdev);
6268                 if (err)
6269                         return err;
6270         }
6271
6272         netif_device_attach(netdev);
6273
6274         return 0;
6275 }
6276 #endif
6277
6278 static void igb_shutdown(struct pci_dev *pdev)
6279 {
6280         bool wake;
6281
6282         __igb_shutdown(pdev, &wake);
6283
6284         if (system_state == SYSTEM_POWER_OFF) {
6285                 pci_wake_from_d3(pdev, wake);
6286                 pci_set_power_state(pdev, PCI_D3hot);
6287         }
6288 }
6289
6290 #ifdef CONFIG_NET_POLL_CONTROLLER
6291 /*
6292  * Polling 'interrupt' - used by things like netconsole to send skbs
6293  * without having to re-enable interrupts. It's not called while
6294  * the interrupt routine is executing.
6295  */
6296 static void igb_netpoll(struct net_device *netdev)
6297 {
6298         struct igb_adapter *adapter = netdev_priv(netdev);
6299         struct e1000_hw *hw = &adapter->hw;
6300         int i;
6301
6302         if (!adapter->msix_entries) {
6303                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6304                 igb_irq_disable(adapter);
6305                 napi_schedule(&q_vector->napi);
6306                 return;
6307         }
6308
6309         for (i = 0; i < adapter->num_q_vectors; i++) {
6310                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6311                 wr32(E1000_EIMC, q_vector->eims_value);
6312                 napi_schedule(&q_vector->napi);
6313         }
6314 }
6315 #endif /* CONFIG_NET_POLL_CONTROLLER */
6316
6317 /**
6318  * igb_io_error_detected - called when PCI error is detected
6319  * @pdev: Pointer to PCI device
6320  * @state: The current pci connection state
6321  *
6322  * This function is called after a PCI bus error affecting
6323  * this device has been detected.
6324  */
6325 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6326                                               pci_channel_state_t state)
6327 {
6328         struct net_device *netdev = pci_get_drvdata(pdev);
6329         struct igb_adapter *adapter = netdev_priv(netdev);
6330
6331         netif_device_detach(netdev);
6332
6333         if (state == pci_channel_io_perm_failure)
6334                 return PCI_ERS_RESULT_DISCONNECT;
6335
6336         if (netif_running(netdev))
6337                 igb_down(adapter);
6338         pci_disable_device(pdev);
6339
6340         /* Request a slot slot reset. */
6341         return PCI_ERS_RESULT_NEED_RESET;
6342 }
6343
6344 /**
6345  * igb_io_slot_reset - called after the pci bus has been reset.
6346  * @pdev: Pointer to PCI device
6347  *
6348  * Restart the card from scratch, as if from a cold-boot. Implementation
6349  * resembles the first-half of the igb_resume routine.
6350  */
6351 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6352 {
6353         struct net_device *netdev = pci_get_drvdata(pdev);
6354         struct igb_adapter *adapter = netdev_priv(netdev);
6355         struct e1000_hw *hw = &adapter->hw;
6356         pci_ers_result_t result;
6357         int err;
6358
6359         if (pci_enable_device_mem(pdev)) {
6360                 dev_err(&pdev->dev,
6361                         "Cannot re-enable PCI device after reset.\n");
6362                 result = PCI_ERS_RESULT_DISCONNECT;
6363         } else {
6364                 pci_set_master(pdev);
6365                 pci_restore_state(pdev);
6366                 pci_save_state(pdev);
6367
6368                 pci_enable_wake(pdev, PCI_D3hot, 0);
6369                 pci_enable_wake(pdev, PCI_D3cold, 0);
6370
6371                 igb_reset(adapter);
6372                 wr32(E1000_WUS, ~0);
6373                 result = PCI_ERS_RESULT_RECOVERED;
6374         }
6375
6376         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6377         if (err) {
6378                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6379                         "failed 0x%0x\n", err);
6380                 /* non-fatal, continue */
6381         }
6382
6383         return result;
6384 }
6385
6386 /**
6387  * igb_io_resume - called when traffic can start flowing again.
6388  * @pdev: Pointer to PCI device
6389  *
6390  * This callback is called when the error recovery driver tells us that
6391  * its OK to resume normal operation. Implementation resembles the
6392  * second-half of the igb_resume routine.
6393  */
6394 static void igb_io_resume(struct pci_dev *pdev)
6395 {
6396         struct net_device *netdev = pci_get_drvdata(pdev);
6397         struct igb_adapter *adapter = netdev_priv(netdev);
6398
6399         if (netif_running(netdev)) {
6400                 if (igb_up(adapter)) {
6401                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6402                         return;
6403                 }
6404         }
6405
6406         netif_device_attach(netdev);
6407
6408         /* let the f/w know that the h/w is now under the control of the
6409          * driver. */
6410         igb_get_hw_control(adapter);
6411 }
6412
6413 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6414                              u8 qsel)
6415 {
6416         u32 rar_low, rar_high;
6417         struct e1000_hw *hw = &adapter->hw;
6418
6419         /* HW expects these in little endian so we reverse the byte order
6420          * from network order (big endian) to little endian
6421          */
6422         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6423                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6424         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6425
6426         /* Indicate to hardware the Address is Valid. */
6427         rar_high |= E1000_RAH_AV;
6428
6429         if (hw->mac.type == e1000_82575)
6430                 rar_high |= E1000_RAH_POOL_1 * qsel;
6431         else
6432                 rar_high |= E1000_RAH_POOL_1 << qsel;
6433
6434         wr32(E1000_RAL(index), rar_low);
6435         wrfl();
6436         wr32(E1000_RAH(index), rar_high);
6437         wrfl();
6438 }
6439
6440 static int igb_set_vf_mac(struct igb_adapter *adapter,
6441                           int vf, unsigned char *mac_addr)
6442 {
6443         struct e1000_hw *hw = &adapter->hw;
6444         /* VF MAC addresses start at end of receive addresses and moves
6445          * torwards the first, as a result a collision should not be possible */
6446         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6447
6448         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6449
6450         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6451
6452         return 0;
6453 }
6454
6455 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6456 {
6457         struct igb_adapter *adapter = netdev_priv(netdev);
6458         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6459                 return -EINVAL;
6460         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6461         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6462         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6463                                       " change effective.");
6464         if (test_bit(__IGB_DOWN, &adapter->state)) {
6465                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6466                          " but the PF device is not up.\n");
6467                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6468                          " attempting to use the VF device.\n");
6469         }
6470         return igb_set_vf_mac(adapter, vf, mac);
6471 }
6472
6473 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6474 {
6475         return -EOPNOTSUPP;
6476 }
6477
6478 static int igb_ndo_get_vf_config(struct net_device *netdev,
6479                                  int vf, struct ifla_vf_info *ivi)
6480 {
6481         struct igb_adapter *adapter = netdev_priv(netdev);
6482         if (vf >= adapter->vfs_allocated_count)
6483                 return -EINVAL;
6484         ivi->vf = vf;
6485         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6486         ivi->tx_rate = 0;
6487         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6488         ivi->qos = adapter->vf_data[vf].pf_qos;
6489         return 0;
6490 }
6491
6492 static void igb_vmm_control(struct igb_adapter *adapter)
6493 {
6494         struct e1000_hw *hw = &adapter->hw;
6495         u32 reg;
6496
6497         switch (hw->mac.type) {
6498         case e1000_82575:
6499         default:
6500                 /* replication is not supported for 82575 */
6501                 return;
6502         case e1000_82576:
6503                 /* notify HW that the MAC is adding vlan tags */
6504                 reg = rd32(E1000_DTXCTL);
6505                 reg |= E1000_DTXCTL_VLAN_ADDED;
6506                 wr32(E1000_DTXCTL, reg);
6507         case e1000_82580:
6508                 /* enable replication vlan tag stripping */
6509                 reg = rd32(E1000_RPLOLR);
6510                 reg |= E1000_RPLOLR_STRVLAN;
6511                 wr32(E1000_RPLOLR, reg);
6512         case e1000_i350:
6513                 /* none of the above registers are supported by i350 */
6514                 break;
6515         }
6516
6517         if (adapter->vfs_allocated_count) {
6518                 igb_vmdq_set_loopback_pf(hw, true);
6519                 igb_vmdq_set_replication_pf(hw, true);
6520         } else {
6521                 igb_vmdq_set_loopback_pf(hw, false);
6522                 igb_vmdq_set_replication_pf(hw, false);
6523         }
6524 }
6525
6526 /* igb_main.c */