]> bbs.cooldavid.org Git - net-next-2.6.git/blob - drivers/net/igb/igb_main.c
Merge branch 'acpica-gpe' into release
[net-next-2.6.git] / drivers / net / igb / igb_main.c
1 /*******************************************************************************
2
3   Intel(R) Gigabit Ethernet Linux driver
4   Copyright(c) 2007-2009 Intel Corporation.
5
6   This program is free software; you can redistribute it and/or modify it
7   under the terms and conditions of the GNU General Public License,
8   version 2, as published by the Free Software Foundation.
9
10   This program is distributed in the hope it will be useful, but WITHOUT
11   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
13   more details.
14
15   You should have received a copy of the GNU General Public License along with
16   this program; if not, write to the Free Software Foundation, Inc.,
17   51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
18
19   The full GNU General Public License is included in this distribution in
20   the file called "COPYING".
21
22   Contact Information:
23   e1000-devel Mailing List <e1000-devel@lists.sourceforge.net>
24   Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
25
26 *******************************************************************************/
27
28 #include <linux/module.h>
29 #include <linux/types.h>
30 #include <linux/init.h>
31 #include <linux/vmalloc.h>
32 #include <linux/pagemap.h>
33 #include <linux/netdevice.h>
34 #include <linux/ipv6.h>
35 #include <linux/slab.h>
36 #include <net/checksum.h>
37 #include <net/ip6_checksum.h>
38 #include <linux/net_tstamp.h>
39 #include <linux/mii.h>
40 #include <linux/ethtool.h>
41 #include <linux/if_vlan.h>
42 #include <linux/pci.h>
43 #include <linux/pci-aspm.h>
44 #include <linux/delay.h>
45 #include <linux/interrupt.h>
46 #include <linux/if_ether.h>
47 #include <linux/aer.h>
48 #ifdef CONFIG_IGB_DCA
49 #include <linux/dca.h>
50 #endif
51 #include "igb.h"
52
53 #define DRV_VERSION "2.1.0-k2"
54 char igb_driver_name[] = "igb";
55 char igb_driver_version[] = DRV_VERSION;
56 static const char igb_driver_string[] =
57                                 "Intel(R) Gigabit Ethernet Network Driver";
58 static const char igb_copyright[] = "Copyright (c) 2007-2009 Intel Corporation.";
59
60 static const struct e1000_info *igb_info_tbl[] = {
61         [board_82575] = &e1000_82575_info,
62 };
63
64 static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = {
65         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER), board_82575 },
66         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER), board_82575 },
67         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES), board_82575 },
68         { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII), board_82575 },
69         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER), board_82575 },
70         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER), board_82575 },
71         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES), board_82575 },
72         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII), board_82575 },
73         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL), board_82575 },
74         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576), board_82575 },
75         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS), board_82575 },
76         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES), board_82575 },
77         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER), board_82575 },
78         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES), board_82575 },
79         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD), board_82575 },
80         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2), board_82575 },
81         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER), board_82575 },
82         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER), board_82575 },
83         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES), board_82575 },
84         { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER), board_82575 },
85         /* required last entry */
86         {0, }
87 };
88
89 MODULE_DEVICE_TABLE(pci, igb_pci_tbl);
90
91 void igb_reset(struct igb_adapter *);
92 static int igb_setup_all_tx_resources(struct igb_adapter *);
93 static int igb_setup_all_rx_resources(struct igb_adapter *);
94 static void igb_free_all_tx_resources(struct igb_adapter *);
95 static void igb_free_all_rx_resources(struct igb_adapter *);
96 static void igb_setup_mrqc(struct igb_adapter *);
97 void igb_update_stats(struct igb_adapter *);
98 static int igb_probe(struct pci_dev *, const struct pci_device_id *);
99 static void __devexit igb_remove(struct pci_dev *pdev);
100 static int igb_sw_init(struct igb_adapter *);
101 static int igb_open(struct net_device *);
102 static int igb_close(struct net_device *);
103 static void igb_configure_tx(struct igb_adapter *);
104 static void igb_configure_rx(struct igb_adapter *);
105 static void igb_clean_all_tx_rings(struct igb_adapter *);
106 static void igb_clean_all_rx_rings(struct igb_adapter *);
107 static void igb_clean_tx_ring(struct igb_ring *);
108 static void igb_clean_rx_ring(struct igb_ring *);
109 static void igb_set_rx_mode(struct net_device *);
110 static void igb_update_phy_info(unsigned long);
111 static void igb_watchdog(unsigned long);
112 static void igb_watchdog_task(struct work_struct *);
113 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb, struct net_device *);
114 static struct net_device_stats *igb_get_stats(struct net_device *);
115 static int igb_change_mtu(struct net_device *, int);
116 static int igb_set_mac(struct net_device *, void *);
117 static void igb_set_uta(struct igb_adapter *adapter);
118 static irqreturn_t igb_intr(int irq, void *);
119 static irqreturn_t igb_intr_msi(int irq, void *);
120 static irqreturn_t igb_msix_other(int irq, void *);
121 static irqreturn_t igb_msix_ring(int irq, void *);
122 #ifdef CONFIG_IGB_DCA
123 static void igb_update_dca(struct igb_q_vector *);
124 static void igb_setup_dca(struct igb_adapter *);
125 #endif /* CONFIG_IGB_DCA */
126 static bool igb_clean_tx_irq(struct igb_q_vector *);
127 static int igb_poll(struct napi_struct *, int);
128 static bool igb_clean_rx_irq_adv(struct igb_q_vector *, int *, int);
129 static int igb_ioctl(struct net_device *, struct ifreq *, int cmd);
130 static void igb_tx_timeout(struct net_device *);
131 static void igb_reset_task(struct work_struct *);
132 static void igb_vlan_rx_register(struct net_device *, struct vlan_group *);
133 static void igb_vlan_rx_add_vid(struct net_device *, u16);
134 static void igb_vlan_rx_kill_vid(struct net_device *, u16);
135 static void igb_restore_vlan(struct igb_adapter *);
136 static void igb_rar_set_qsel(struct igb_adapter *, u8 *, u32 , u8);
137 static void igb_ping_all_vfs(struct igb_adapter *);
138 static void igb_msg_task(struct igb_adapter *);
139 static void igb_vmm_control(struct igb_adapter *);
140 static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *);
141 static void igb_restore_vf_multicasts(struct igb_adapter *adapter);
142 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
143 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
144                                int vf, u16 vlan, u8 qos);
145 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate);
146 static int igb_ndo_get_vf_config(struct net_device *netdev, int vf,
147                                  struct ifla_vf_info *ivi);
148
149 #ifdef CONFIG_PM
150 static int igb_suspend(struct pci_dev *, pm_message_t);
151 static int igb_resume(struct pci_dev *);
152 #endif
153 static void igb_shutdown(struct pci_dev *);
154 #ifdef CONFIG_IGB_DCA
155 static int igb_notify_dca(struct notifier_block *, unsigned long, void *);
156 static struct notifier_block dca_notifier = {
157         .notifier_call  = igb_notify_dca,
158         .next           = NULL,
159         .priority       = 0
160 };
161 #endif
162 #ifdef CONFIG_NET_POLL_CONTROLLER
163 /* for netdump / net console */
164 static void igb_netpoll(struct net_device *);
165 #endif
166 #ifdef CONFIG_PCI_IOV
167 static unsigned int max_vfs = 0;
168 module_param(max_vfs, uint, 0);
169 MODULE_PARM_DESC(max_vfs, "Maximum number of virtual functions to allocate "
170                  "per physical function");
171 #endif /* CONFIG_PCI_IOV */
172
173 static pci_ers_result_t igb_io_error_detected(struct pci_dev *,
174                      pci_channel_state_t);
175 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *);
176 static void igb_io_resume(struct pci_dev *);
177
178 static struct pci_error_handlers igb_err_handler = {
179         .error_detected = igb_io_error_detected,
180         .slot_reset = igb_io_slot_reset,
181         .resume = igb_io_resume,
182 };
183
184
185 static struct pci_driver igb_driver = {
186         .name     = igb_driver_name,
187         .id_table = igb_pci_tbl,
188         .probe    = igb_probe,
189         .remove   = __devexit_p(igb_remove),
190 #ifdef CONFIG_PM
191         /* Power Managment Hooks */
192         .suspend  = igb_suspend,
193         .resume   = igb_resume,
194 #endif
195         .shutdown = igb_shutdown,
196         .err_handler = &igb_err_handler
197 };
198
199 MODULE_AUTHOR("Intel Corporation, <e1000-devel@lists.sourceforge.net>");
200 MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver");
201 MODULE_LICENSE("GPL");
202 MODULE_VERSION(DRV_VERSION);
203
204 struct igb_reg_info {
205         u32 ofs;
206         char *name;
207 };
208
209 static const struct igb_reg_info igb_reg_info_tbl[] = {
210
211         /* General Registers */
212         {E1000_CTRL, "CTRL"},
213         {E1000_STATUS, "STATUS"},
214         {E1000_CTRL_EXT, "CTRL_EXT"},
215
216         /* Interrupt Registers */
217         {E1000_ICR, "ICR"},
218
219         /* RX Registers */
220         {E1000_RCTL, "RCTL"},
221         {E1000_RDLEN(0), "RDLEN"},
222         {E1000_RDH(0), "RDH"},
223         {E1000_RDT(0), "RDT"},
224         {E1000_RXDCTL(0), "RXDCTL"},
225         {E1000_RDBAL(0), "RDBAL"},
226         {E1000_RDBAH(0), "RDBAH"},
227
228         /* TX Registers */
229         {E1000_TCTL, "TCTL"},
230         {E1000_TDBAL(0), "TDBAL"},
231         {E1000_TDBAH(0), "TDBAH"},
232         {E1000_TDLEN(0), "TDLEN"},
233         {E1000_TDH(0), "TDH"},
234         {E1000_TDT(0), "TDT"},
235         {E1000_TXDCTL(0), "TXDCTL"},
236         {E1000_TDFH, "TDFH"},
237         {E1000_TDFT, "TDFT"},
238         {E1000_TDFHS, "TDFHS"},
239         {E1000_TDFPC, "TDFPC"},
240
241         /* List Terminator */
242         {}
243 };
244
245 /*
246  * igb_regdump - register printout routine
247  */
248 static void igb_regdump(struct e1000_hw *hw, struct igb_reg_info *reginfo)
249 {
250         int n = 0;
251         char rname[16];
252         u32 regs[8];
253
254         switch (reginfo->ofs) {
255         case E1000_RDLEN(0):
256                 for (n = 0; n < 4; n++)
257                         regs[n] = rd32(E1000_RDLEN(n));
258                 break;
259         case E1000_RDH(0):
260                 for (n = 0; n < 4; n++)
261                         regs[n] = rd32(E1000_RDH(n));
262                 break;
263         case E1000_RDT(0):
264                 for (n = 0; n < 4; n++)
265                         regs[n] = rd32(E1000_RDT(n));
266                 break;
267         case E1000_RXDCTL(0):
268                 for (n = 0; n < 4; n++)
269                         regs[n] = rd32(E1000_RXDCTL(n));
270                 break;
271         case E1000_RDBAL(0):
272                 for (n = 0; n < 4; n++)
273                         regs[n] = rd32(E1000_RDBAL(n));
274                 break;
275         case E1000_RDBAH(0):
276                 for (n = 0; n < 4; n++)
277                         regs[n] = rd32(E1000_RDBAH(n));
278                 break;
279         case E1000_TDBAL(0):
280                 for (n = 0; n < 4; n++)
281                         regs[n] = rd32(E1000_RDBAL(n));
282                 break;
283         case E1000_TDBAH(0):
284                 for (n = 0; n < 4; n++)
285                         regs[n] = rd32(E1000_TDBAH(n));
286                 break;
287         case E1000_TDLEN(0):
288                 for (n = 0; n < 4; n++)
289                         regs[n] = rd32(E1000_TDLEN(n));
290                 break;
291         case E1000_TDH(0):
292                 for (n = 0; n < 4; n++)
293                         regs[n] = rd32(E1000_TDH(n));
294                 break;
295         case E1000_TDT(0):
296                 for (n = 0; n < 4; n++)
297                         regs[n] = rd32(E1000_TDT(n));
298                 break;
299         case E1000_TXDCTL(0):
300                 for (n = 0; n < 4; n++)
301                         regs[n] = rd32(E1000_TXDCTL(n));
302                 break;
303         default:
304                 printk(KERN_INFO "%-15s %08x\n",
305                         reginfo->name, rd32(reginfo->ofs));
306                 return;
307         }
308
309         snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
310         printk(KERN_INFO "%-15s ", rname);
311         for (n = 0; n < 4; n++)
312                 printk(KERN_CONT "%08x ", regs[n]);
313         printk(KERN_CONT "\n");
314 }
315
316 /*
317  * igb_dump - Print registers, tx-rings and rx-rings
318  */
319 static void igb_dump(struct igb_adapter *adapter)
320 {
321         struct net_device *netdev = adapter->netdev;
322         struct e1000_hw *hw = &adapter->hw;
323         struct igb_reg_info *reginfo;
324         int n = 0;
325         struct igb_ring *tx_ring;
326         union e1000_adv_tx_desc *tx_desc;
327         struct my_u0 { u64 a; u64 b; } *u0;
328         struct igb_buffer *buffer_info;
329         struct igb_ring *rx_ring;
330         union e1000_adv_rx_desc *rx_desc;
331         u32 staterr;
332         int i = 0;
333
334         if (!netif_msg_hw(adapter))
335                 return;
336
337         /* Print netdevice Info */
338         if (netdev) {
339                 dev_info(&adapter->pdev->dev, "Net device Info\n");
340                 printk(KERN_INFO "Device Name     state            "
341                         "trans_start      last_rx\n");
342                 printk(KERN_INFO "%-15s %016lX %016lX %016lX\n",
343                 netdev->name,
344                 netdev->state,
345                 netdev->trans_start,
346                 netdev->last_rx);
347         }
348
349         /* Print Registers */
350         dev_info(&adapter->pdev->dev, "Register Dump\n");
351         printk(KERN_INFO " Register Name   Value\n");
352         for (reginfo = (struct igb_reg_info *)igb_reg_info_tbl;
353              reginfo->name; reginfo++) {
354                 igb_regdump(hw, reginfo);
355         }
356
357         /* Print TX Ring Summary */
358         if (!netdev || !netif_running(netdev))
359                 goto exit;
360
361         dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
362         printk(KERN_INFO "Queue [NTU] [NTC] [bi(ntc)->dma  ]"
363                 " leng ntw timestamp\n");
364         for (n = 0; n < adapter->num_tx_queues; n++) {
365                 tx_ring = adapter->tx_ring[n];
366                 buffer_info = &tx_ring->buffer_info[tx_ring->next_to_clean];
367                 printk(KERN_INFO " %5d %5X %5X %016llX %04X %3X %016llX\n",
368                            n, tx_ring->next_to_use, tx_ring->next_to_clean,
369                            (u64)buffer_info->dma,
370                            buffer_info->length,
371                            buffer_info->next_to_watch,
372                            (u64)buffer_info->time_stamp);
373         }
374
375         /* Print TX Rings */
376         if (!netif_msg_tx_done(adapter))
377                 goto rx_ring_summary;
378
379         dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
380
381         /* Transmit Descriptor Formats
382          *
383          * Advanced Transmit Descriptor
384          *   +--------------------------------------------------------------+
385          * 0 |         Buffer Address [63:0]                                |
386          *   +--------------------------------------------------------------+
387          * 8 | PAYLEN  | PORTS  |CC|IDX | STA | DCMD  |DTYP|MAC|RSV| DTALEN |
388          *   +--------------------------------------------------------------+
389          *   63      46 45    40 39 38 36 35 32 31   24             15       0
390          */
391
392         for (n = 0; n < adapter->num_tx_queues; n++) {
393                 tx_ring = adapter->tx_ring[n];
394                 printk(KERN_INFO "------------------------------------\n");
395                 printk(KERN_INFO "TX QUEUE INDEX = %d\n", tx_ring->queue_index);
396                 printk(KERN_INFO "------------------------------------\n");
397                 printk(KERN_INFO "T [desc]     [address 63:0  ] "
398                         "[PlPOCIStDDM Ln] [bi->dma       ] "
399                         "leng  ntw timestamp        bi->skb\n");
400
401                 for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
402                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
403                         buffer_info = &tx_ring->buffer_info[i];
404                         u0 = (struct my_u0 *)tx_desc;
405                         printk(KERN_INFO "T [0x%03X]    %016llX %016llX %016llX"
406                                 " %04X  %3X %016llX %p", i,
407                                 le64_to_cpu(u0->a),
408                                 le64_to_cpu(u0->b),
409                                 (u64)buffer_info->dma,
410                                 buffer_info->length,
411                                 buffer_info->next_to_watch,
412                                 (u64)buffer_info->time_stamp,
413                                 buffer_info->skb);
414                         if (i == tx_ring->next_to_use &&
415                                 i == tx_ring->next_to_clean)
416                                 printk(KERN_CONT " NTC/U\n");
417                         else if (i == tx_ring->next_to_use)
418                                 printk(KERN_CONT " NTU\n");
419                         else if (i == tx_ring->next_to_clean)
420                                 printk(KERN_CONT " NTC\n");
421                         else
422                                 printk(KERN_CONT "\n");
423
424                         if (netif_msg_pktdata(adapter) && buffer_info->dma != 0)
425                                 print_hex_dump(KERN_INFO, "",
426                                         DUMP_PREFIX_ADDRESS,
427                                         16, 1, phys_to_virt(buffer_info->dma),
428                                         buffer_info->length, true);
429                 }
430         }
431
432         /* Print RX Rings Summary */
433 rx_ring_summary:
434         dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
435         printk(KERN_INFO "Queue [NTU] [NTC]\n");
436         for (n = 0; n < adapter->num_rx_queues; n++) {
437                 rx_ring = adapter->rx_ring[n];
438                 printk(KERN_INFO " %5d %5X %5X\n", n,
439                            rx_ring->next_to_use, rx_ring->next_to_clean);
440         }
441
442         /* Print RX Rings */
443         if (!netif_msg_rx_status(adapter))
444                 goto exit;
445
446         dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
447
448         /* Advanced Receive Descriptor (Read) Format
449          *    63                                           1        0
450          *    +-----------------------------------------------------+
451          *  0 |       Packet Buffer Address [63:1]           |A0/NSE|
452          *    +----------------------------------------------+------+
453          *  8 |       Header Buffer Address [63:1]           |  DD  |
454          *    +-----------------------------------------------------+
455          *
456          *
457          * Advanced Receive Descriptor (Write-Back) Format
458          *
459          *   63       48 47    32 31  30      21 20 17 16   4 3     0
460          *   +------------------------------------------------------+
461          * 0 | Packet     IP     |SPH| HDR_LEN   | RSV|Packet|  RSS |
462          *   | Checksum   Ident  |   |           |    | Type | Type |
463          *   +------------------------------------------------------+
464          * 8 | VLAN Tag | Length | Extended Error | Extended Status |
465          *   +------------------------------------------------------+
466          *   63       48 47    32 31            20 19               0
467          */
468
469         for (n = 0; n < adapter->num_rx_queues; n++) {
470                 rx_ring = adapter->rx_ring[n];
471                 printk(KERN_INFO "------------------------------------\n");
472                 printk(KERN_INFO "RX QUEUE INDEX = %d\n", rx_ring->queue_index);
473                 printk(KERN_INFO "------------------------------------\n");
474                 printk(KERN_INFO "R  [desc]      [ PktBuf     A0] "
475                         "[  HeadBuf   DD] [bi->dma       ] [bi->skb] "
476                         "<-- Adv Rx Read format\n");
477                 printk(KERN_INFO "RWB[desc]      [PcsmIpSHl PtRs] "
478                         "[vl er S cks ln] ---------------- [bi->skb] "
479                         "<-- Adv Rx Write-Back format\n");
480
481                 for (i = 0; i < rx_ring->count; i++) {
482                         buffer_info = &rx_ring->buffer_info[i];
483                         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
484                         u0 = (struct my_u0 *)rx_desc;
485                         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
486                         if (staterr & E1000_RXD_STAT_DD) {
487                                 /* Descriptor Done */
488                                 printk(KERN_INFO "RWB[0x%03X]     %016llX "
489                                         "%016llX ---------------- %p", i,
490                                         le64_to_cpu(u0->a),
491                                         le64_to_cpu(u0->b),
492                                         buffer_info->skb);
493                         } else {
494                                 printk(KERN_INFO "R  [0x%03X]     %016llX "
495                                         "%016llX %016llX %p", i,
496                                         le64_to_cpu(u0->a),
497                                         le64_to_cpu(u0->b),
498                                         (u64)buffer_info->dma,
499                                         buffer_info->skb);
500
501                                 if (netif_msg_pktdata(adapter)) {
502                                         print_hex_dump(KERN_INFO, "",
503                                                 DUMP_PREFIX_ADDRESS,
504                                                 16, 1,
505                                                 phys_to_virt(buffer_info->dma),
506                                                 rx_ring->rx_buffer_len, true);
507                                         if (rx_ring->rx_buffer_len
508                                                 < IGB_RXBUFFER_1024)
509                                                 print_hex_dump(KERN_INFO, "",
510                                                   DUMP_PREFIX_ADDRESS,
511                                                   16, 1,
512                                                   phys_to_virt(
513                                                     buffer_info->page_dma +
514                                                     buffer_info->page_offset),
515                                                   PAGE_SIZE/2, true);
516                                 }
517                         }
518
519                         if (i == rx_ring->next_to_use)
520                                 printk(KERN_CONT " NTU\n");
521                         else if (i == rx_ring->next_to_clean)
522                                 printk(KERN_CONT " NTC\n");
523                         else
524                                 printk(KERN_CONT "\n");
525
526                 }
527         }
528
529 exit:
530         return;
531 }
532
533
534 /**
535  * igb_read_clock - read raw cycle counter (to be used by time counter)
536  */
537 static cycle_t igb_read_clock(const struct cyclecounter *tc)
538 {
539         struct igb_adapter *adapter =
540                 container_of(tc, struct igb_adapter, cycles);
541         struct e1000_hw *hw = &adapter->hw;
542         u64 stamp = 0;
543         int shift = 0;
544
545         /*
546          * The timestamp latches on lowest register read. For the 82580
547          * the lowest register is SYSTIMR instead of SYSTIML.  However we never
548          * adjusted TIMINCA so SYSTIMR will just read as all 0s so ignore it.
549          */
550         if (hw->mac.type == e1000_82580) {
551                 stamp = rd32(E1000_SYSTIMR) >> 8;
552                 shift = IGB_82580_TSYNC_SHIFT;
553         }
554
555         stamp |= (u64)rd32(E1000_SYSTIML) << shift;
556         stamp |= (u64)rd32(E1000_SYSTIMH) << (shift + 32);
557         return stamp;
558 }
559
560 /**
561  * igb_get_hw_dev - return device
562  * used by hardware layer to print debugging information
563  **/
564 struct net_device *igb_get_hw_dev(struct e1000_hw *hw)
565 {
566         struct igb_adapter *adapter = hw->back;
567         return adapter->netdev;
568 }
569
570 /**
571  * igb_init_module - Driver Registration Routine
572  *
573  * igb_init_module is the first routine called when the driver is
574  * loaded. All it does is register with the PCI subsystem.
575  **/
576 static int __init igb_init_module(void)
577 {
578         int ret;
579         printk(KERN_INFO "%s - version %s\n",
580                igb_driver_string, igb_driver_version);
581
582         printk(KERN_INFO "%s\n", igb_copyright);
583
584 #ifdef CONFIG_IGB_DCA
585         dca_register_notify(&dca_notifier);
586 #endif
587         ret = pci_register_driver(&igb_driver);
588         return ret;
589 }
590
591 module_init(igb_init_module);
592
593 /**
594  * igb_exit_module - Driver Exit Cleanup Routine
595  *
596  * igb_exit_module is called just before the driver is removed
597  * from memory.
598  **/
599 static void __exit igb_exit_module(void)
600 {
601 #ifdef CONFIG_IGB_DCA
602         dca_unregister_notify(&dca_notifier);
603 #endif
604         pci_unregister_driver(&igb_driver);
605 }
606
607 module_exit(igb_exit_module);
608
609 #define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1))
610 /**
611  * igb_cache_ring_register - Descriptor ring to register mapping
612  * @adapter: board private structure to initialize
613  *
614  * Once we know the feature-set enabled for the device, we'll cache
615  * the register offset the descriptor ring is assigned to.
616  **/
617 static void igb_cache_ring_register(struct igb_adapter *adapter)
618 {
619         int i = 0, j = 0;
620         u32 rbase_offset = adapter->vfs_allocated_count;
621
622         switch (adapter->hw.mac.type) {
623         case e1000_82576:
624                 /* The queues are allocated for virtualization such that VF 0
625                  * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc.
626                  * In order to avoid collision we start at the first free queue
627                  * and continue consuming queues in the same sequence
628                  */
629                 if (adapter->vfs_allocated_count) {
630                         for (; i < adapter->rss_queues; i++)
631                                 adapter->rx_ring[i]->reg_idx = rbase_offset +
632                                                                Q_IDX_82576(i);
633                         for (; j < adapter->rss_queues; j++)
634                                 adapter->tx_ring[j]->reg_idx = rbase_offset +
635                                                                Q_IDX_82576(j);
636                 }
637         case e1000_82575:
638         case e1000_82580:
639         case e1000_i350:
640         default:
641                 for (; i < adapter->num_rx_queues; i++)
642                         adapter->rx_ring[i]->reg_idx = rbase_offset + i;
643                 for (; j < adapter->num_tx_queues; j++)
644                         adapter->tx_ring[j]->reg_idx = rbase_offset + j;
645                 break;
646         }
647 }
648
649 static void igb_free_queues(struct igb_adapter *adapter)
650 {
651         int i;
652
653         for (i = 0; i < adapter->num_tx_queues; i++) {
654                 kfree(adapter->tx_ring[i]);
655                 adapter->tx_ring[i] = NULL;
656         }
657         for (i = 0; i < adapter->num_rx_queues; i++) {
658                 kfree(adapter->rx_ring[i]);
659                 adapter->rx_ring[i] = NULL;
660         }
661         adapter->num_rx_queues = 0;
662         adapter->num_tx_queues = 0;
663 }
664
665 /**
666  * igb_alloc_queues - Allocate memory for all rings
667  * @adapter: board private structure to initialize
668  *
669  * We allocate one ring per queue at run-time since we don't know the
670  * number of queues at compile-time.
671  **/
672 static int igb_alloc_queues(struct igb_adapter *adapter)
673 {
674         struct igb_ring *ring;
675         int i;
676
677         for (i = 0; i < adapter->num_tx_queues; i++) {
678                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
679                 if (!ring)
680                         goto err;
681                 ring->count = adapter->tx_ring_count;
682                 ring->queue_index = i;
683                 ring->dev = &adapter->pdev->dev;
684                 ring->netdev = adapter->netdev;
685                 /* For 82575, context index must be unique per ring. */
686                 if (adapter->hw.mac.type == e1000_82575)
687                         ring->flags = IGB_RING_FLAG_TX_CTX_IDX;
688                 adapter->tx_ring[i] = ring;
689         }
690
691         for (i = 0; i < adapter->num_rx_queues; i++) {
692                 ring = kzalloc(sizeof(struct igb_ring), GFP_KERNEL);
693                 if (!ring)
694                         goto err;
695                 ring->count = adapter->rx_ring_count;
696                 ring->queue_index = i;
697                 ring->dev = &adapter->pdev->dev;
698                 ring->netdev = adapter->netdev;
699                 ring->rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
700                 ring->flags = IGB_RING_FLAG_RX_CSUM; /* enable rx checksum */
701                 /* set flag indicating ring supports SCTP checksum offload */
702                 if (adapter->hw.mac.type >= e1000_82576)
703                         ring->flags |= IGB_RING_FLAG_RX_SCTP_CSUM;
704                 adapter->rx_ring[i] = ring;
705         }
706
707         igb_cache_ring_register(adapter);
708
709         return 0;
710
711 err:
712         igb_free_queues(adapter);
713
714         return -ENOMEM;
715 }
716
717 #define IGB_N0_QUEUE -1
718 static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector)
719 {
720         u32 msixbm = 0;
721         struct igb_adapter *adapter = q_vector->adapter;
722         struct e1000_hw *hw = &adapter->hw;
723         u32 ivar, index;
724         int rx_queue = IGB_N0_QUEUE;
725         int tx_queue = IGB_N0_QUEUE;
726
727         if (q_vector->rx_ring)
728                 rx_queue = q_vector->rx_ring->reg_idx;
729         if (q_vector->tx_ring)
730                 tx_queue = q_vector->tx_ring->reg_idx;
731
732         switch (hw->mac.type) {
733         case e1000_82575:
734                 /* The 82575 assigns vectors using a bitmask, which matches the
735                    bitmask for the EICR/EIMS/EIMC registers.  To assign one
736                    or more queues to a vector, we write the appropriate bits
737                    into the MSIXBM register for that vector. */
738                 if (rx_queue > IGB_N0_QUEUE)
739                         msixbm = E1000_EICR_RX_QUEUE0 << rx_queue;
740                 if (tx_queue > IGB_N0_QUEUE)
741                         msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue;
742                 if (!adapter->msix_entries && msix_vector == 0)
743                         msixbm |= E1000_EIMS_OTHER;
744                 array_wr32(E1000_MSIXBM(0), msix_vector, msixbm);
745                 q_vector->eims_value = msixbm;
746                 break;
747         case e1000_82576:
748                 /* 82576 uses a table-based method for assigning vectors.
749                    Each queue has a single entry in the table to which we write
750                    a vector number along with a "valid" bit.  Sadly, the layout
751                    of the table is somewhat counterintuitive. */
752                 if (rx_queue > IGB_N0_QUEUE) {
753                         index = (rx_queue & 0x7);
754                         ivar = array_rd32(E1000_IVAR0, index);
755                         if (rx_queue < 8) {
756                                 /* vector goes into low byte of register */
757                                 ivar = ivar & 0xFFFFFF00;
758                                 ivar |= msix_vector | E1000_IVAR_VALID;
759                         } else {
760                                 /* vector goes into third byte of register */
761                                 ivar = ivar & 0xFF00FFFF;
762                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
763                         }
764                         array_wr32(E1000_IVAR0, index, ivar);
765                 }
766                 if (tx_queue > IGB_N0_QUEUE) {
767                         index = (tx_queue & 0x7);
768                         ivar = array_rd32(E1000_IVAR0, index);
769                         if (tx_queue < 8) {
770                                 /* vector goes into second byte of register */
771                                 ivar = ivar & 0xFFFF00FF;
772                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
773                         } else {
774                                 /* vector goes into high byte of register */
775                                 ivar = ivar & 0x00FFFFFF;
776                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
777                         }
778                         array_wr32(E1000_IVAR0, index, ivar);
779                 }
780                 q_vector->eims_value = 1 << msix_vector;
781                 break;
782         case e1000_82580:
783         case e1000_i350:
784                 /* 82580 uses the same table-based approach as 82576 but has fewer
785                    entries as a result we carry over for queues greater than 4. */
786                 if (rx_queue > IGB_N0_QUEUE) {
787                         index = (rx_queue >> 1);
788                         ivar = array_rd32(E1000_IVAR0, index);
789                         if (rx_queue & 0x1) {
790                                 /* vector goes into third byte of register */
791                                 ivar = ivar & 0xFF00FFFF;
792                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 16;
793                         } else {
794                                 /* vector goes into low byte of register */
795                                 ivar = ivar & 0xFFFFFF00;
796                                 ivar |= msix_vector | E1000_IVAR_VALID;
797                         }
798                         array_wr32(E1000_IVAR0, index, ivar);
799                 }
800                 if (tx_queue > IGB_N0_QUEUE) {
801                         index = (tx_queue >> 1);
802                         ivar = array_rd32(E1000_IVAR0, index);
803                         if (tx_queue & 0x1) {
804                                 /* vector goes into high byte of register */
805                                 ivar = ivar & 0x00FFFFFF;
806                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 24;
807                         } else {
808                                 /* vector goes into second byte of register */
809                                 ivar = ivar & 0xFFFF00FF;
810                                 ivar |= (msix_vector | E1000_IVAR_VALID) << 8;
811                         }
812                         array_wr32(E1000_IVAR0, index, ivar);
813                 }
814                 q_vector->eims_value = 1 << msix_vector;
815                 break;
816         default:
817                 BUG();
818                 break;
819         }
820
821         /* add q_vector eims value to global eims_enable_mask */
822         adapter->eims_enable_mask |= q_vector->eims_value;
823
824         /* configure q_vector to set itr on first interrupt */
825         q_vector->set_itr = 1;
826 }
827
828 /**
829  * igb_configure_msix - Configure MSI-X hardware
830  *
831  * igb_configure_msix sets up the hardware to properly
832  * generate MSI-X interrupts.
833  **/
834 static void igb_configure_msix(struct igb_adapter *adapter)
835 {
836         u32 tmp;
837         int i, vector = 0;
838         struct e1000_hw *hw = &adapter->hw;
839
840         adapter->eims_enable_mask = 0;
841
842         /* set vector for other causes, i.e. link changes */
843         switch (hw->mac.type) {
844         case e1000_82575:
845                 tmp = rd32(E1000_CTRL_EXT);
846                 /* enable MSI-X PBA support*/
847                 tmp |= E1000_CTRL_EXT_PBA_CLR;
848
849                 /* Auto-Mask interrupts upon ICR read. */
850                 tmp |= E1000_CTRL_EXT_EIAME;
851                 tmp |= E1000_CTRL_EXT_IRCA;
852
853                 wr32(E1000_CTRL_EXT, tmp);
854
855                 /* enable msix_other interrupt */
856                 array_wr32(E1000_MSIXBM(0), vector++,
857                                       E1000_EIMS_OTHER);
858                 adapter->eims_other = E1000_EIMS_OTHER;
859
860                 break;
861
862         case e1000_82576:
863         case e1000_82580:
864         case e1000_i350:
865                 /* Turn on MSI-X capability first, or our settings
866                  * won't stick.  And it will take days to debug. */
867                 wr32(E1000_GPIE, E1000_GPIE_MSIX_MODE |
868                                 E1000_GPIE_PBA | E1000_GPIE_EIAME |
869                                 E1000_GPIE_NSICR);
870
871                 /* enable msix_other interrupt */
872                 adapter->eims_other = 1 << vector;
873                 tmp = (vector++ | E1000_IVAR_VALID) << 8;
874
875                 wr32(E1000_IVAR_MISC, tmp);
876                 break;
877         default:
878                 /* do nothing, since nothing else supports MSI-X */
879                 break;
880         } /* switch (hw->mac.type) */
881
882         adapter->eims_enable_mask |= adapter->eims_other;
883
884         for (i = 0; i < adapter->num_q_vectors; i++)
885                 igb_assign_vector(adapter->q_vector[i], vector++);
886
887         wrfl();
888 }
889
890 /**
891  * igb_request_msix - Initialize MSI-X interrupts
892  *
893  * igb_request_msix allocates MSI-X vectors and requests interrupts from the
894  * kernel.
895  **/
896 static int igb_request_msix(struct igb_adapter *adapter)
897 {
898         struct net_device *netdev = adapter->netdev;
899         struct e1000_hw *hw = &adapter->hw;
900         int i, err = 0, vector = 0;
901
902         err = request_irq(adapter->msix_entries[vector].vector,
903                           igb_msix_other, 0, netdev->name, adapter);
904         if (err)
905                 goto out;
906         vector++;
907
908         for (i = 0; i < adapter->num_q_vectors; i++) {
909                 struct igb_q_vector *q_vector = adapter->q_vector[i];
910
911                 q_vector->itr_register = hw->hw_addr + E1000_EITR(vector);
912
913                 if (q_vector->rx_ring && q_vector->tx_ring)
914                         sprintf(q_vector->name, "%s-TxRx-%u", netdev->name,
915                                 q_vector->rx_ring->queue_index);
916                 else if (q_vector->tx_ring)
917                         sprintf(q_vector->name, "%s-tx-%u", netdev->name,
918                                 q_vector->tx_ring->queue_index);
919                 else if (q_vector->rx_ring)
920                         sprintf(q_vector->name, "%s-rx-%u", netdev->name,
921                                 q_vector->rx_ring->queue_index);
922                 else
923                         sprintf(q_vector->name, "%s-unused", netdev->name);
924
925                 err = request_irq(adapter->msix_entries[vector].vector,
926                                   igb_msix_ring, 0, q_vector->name,
927                                   q_vector);
928                 if (err)
929                         goto out;
930                 vector++;
931         }
932
933         igb_configure_msix(adapter);
934         return 0;
935 out:
936         return err;
937 }
938
939 static void igb_reset_interrupt_capability(struct igb_adapter *adapter)
940 {
941         if (adapter->msix_entries) {
942                 pci_disable_msix(adapter->pdev);
943                 kfree(adapter->msix_entries);
944                 adapter->msix_entries = NULL;
945         } else if (adapter->flags & IGB_FLAG_HAS_MSI) {
946                 pci_disable_msi(adapter->pdev);
947         }
948 }
949
950 /**
951  * igb_free_q_vectors - Free memory allocated for interrupt vectors
952  * @adapter: board private structure to initialize
953  *
954  * This function frees the memory allocated to the q_vectors.  In addition if
955  * NAPI is enabled it will delete any references to the NAPI struct prior
956  * to freeing the q_vector.
957  **/
958 static void igb_free_q_vectors(struct igb_adapter *adapter)
959 {
960         int v_idx;
961
962         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
963                 struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
964                 adapter->q_vector[v_idx] = NULL;
965                 if (!q_vector)
966                         continue;
967                 netif_napi_del(&q_vector->napi);
968                 kfree(q_vector);
969         }
970         adapter->num_q_vectors = 0;
971 }
972
973 /**
974  * igb_clear_interrupt_scheme - reset the device to a state of no interrupts
975  *
976  * This function resets the device so that it has 0 rx queues, tx queues, and
977  * MSI-X interrupts allocated.
978  */
979 static void igb_clear_interrupt_scheme(struct igb_adapter *adapter)
980 {
981         igb_free_queues(adapter);
982         igb_free_q_vectors(adapter);
983         igb_reset_interrupt_capability(adapter);
984 }
985
986 /**
987  * igb_set_interrupt_capability - set MSI or MSI-X if supported
988  *
989  * Attempt to configure interrupts using the best available
990  * capabilities of the hardware and kernel.
991  **/
992 static void igb_set_interrupt_capability(struct igb_adapter *adapter)
993 {
994         int err;
995         int numvecs, i;
996
997         /* Number of supported queues. */
998         adapter->num_rx_queues = adapter->rss_queues;
999         adapter->num_tx_queues = adapter->rss_queues;
1000
1001         /* start with one vector for every rx queue */
1002         numvecs = adapter->num_rx_queues;
1003
1004         /* if tx handler is separate add 1 for every tx queue */
1005         if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS))
1006                 numvecs += adapter->num_tx_queues;
1007
1008         /* store the number of vectors reserved for queues */
1009         adapter->num_q_vectors = numvecs;
1010
1011         /* add 1 vector for link status interrupts */
1012         numvecs++;
1013         adapter->msix_entries = kcalloc(numvecs, sizeof(struct msix_entry),
1014                                         GFP_KERNEL);
1015         if (!adapter->msix_entries)
1016                 goto msi_only;
1017
1018         for (i = 0; i < numvecs; i++)
1019                 adapter->msix_entries[i].entry = i;
1020
1021         err = pci_enable_msix(adapter->pdev,
1022                               adapter->msix_entries,
1023                               numvecs);
1024         if (err == 0)
1025                 goto out;
1026
1027         igb_reset_interrupt_capability(adapter);
1028
1029         /* If we can't do MSI-X, try MSI */
1030 msi_only:
1031 #ifdef CONFIG_PCI_IOV
1032         /* disable SR-IOV for non MSI-X configurations */
1033         if (adapter->vf_data) {
1034                 struct e1000_hw *hw = &adapter->hw;
1035                 /* disable iov and allow time for transactions to clear */
1036                 pci_disable_sriov(adapter->pdev);
1037                 msleep(500);
1038
1039                 kfree(adapter->vf_data);
1040                 adapter->vf_data = NULL;
1041                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
1042                 msleep(100);
1043                 dev_info(&adapter->pdev->dev, "IOV Disabled\n");
1044         }
1045 #endif
1046         adapter->vfs_allocated_count = 0;
1047         adapter->rss_queues = 1;
1048         adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
1049         adapter->num_rx_queues = 1;
1050         adapter->num_tx_queues = 1;
1051         adapter->num_q_vectors = 1;
1052         if (!pci_enable_msi(adapter->pdev))
1053                 adapter->flags |= IGB_FLAG_HAS_MSI;
1054 out:
1055         /* Notify the stack of the (possibly) reduced Tx Queue count. */
1056         adapter->netdev->real_num_tx_queues = adapter->num_tx_queues;
1057 }
1058
1059 /**
1060  * igb_alloc_q_vectors - Allocate memory for interrupt vectors
1061  * @adapter: board private structure to initialize
1062  *
1063  * We allocate one q_vector per queue interrupt.  If allocation fails we
1064  * return -ENOMEM.
1065  **/
1066 static int igb_alloc_q_vectors(struct igb_adapter *adapter)
1067 {
1068         struct igb_q_vector *q_vector;
1069         struct e1000_hw *hw = &adapter->hw;
1070         int v_idx;
1071
1072         for (v_idx = 0; v_idx < adapter->num_q_vectors; v_idx++) {
1073                 q_vector = kzalloc(sizeof(struct igb_q_vector), GFP_KERNEL);
1074                 if (!q_vector)
1075                         goto err_out;
1076                 q_vector->adapter = adapter;
1077                 q_vector->itr_register = hw->hw_addr + E1000_EITR(0);
1078                 q_vector->itr_val = IGB_START_ITR;
1079                 netif_napi_add(adapter->netdev, &q_vector->napi, igb_poll, 64);
1080                 adapter->q_vector[v_idx] = q_vector;
1081         }
1082         return 0;
1083
1084 err_out:
1085         igb_free_q_vectors(adapter);
1086         return -ENOMEM;
1087 }
1088
1089 static void igb_map_rx_ring_to_vector(struct igb_adapter *adapter,
1090                                       int ring_idx, int v_idx)
1091 {
1092         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1093
1094         q_vector->rx_ring = adapter->rx_ring[ring_idx];
1095         q_vector->rx_ring->q_vector = q_vector;
1096         q_vector->itr_val = adapter->rx_itr_setting;
1097         if (q_vector->itr_val && q_vector->itr_val <= 3)
1098                 q_vector->itr_val = IGB_START_ITR;
1099 }
1100
1101 static void igb_map_tx_ring_to_vector(struct igb_adapter *adapter,
1102                                       int ring_idx, int v_idx)
1103 {
1104         struct igb_q_vector *q_vector = adapter->q_vector[v_idx];
1105
1106         q_vector->tx_ring = adapter->tx_ring[ring_idx];
1107         q_vector->tx_ring->q_vector = q_vector;
1108         q_vector->itr_val = adapter->tx_itr_setting;
1109         if (q_vector->itr_val && q_vector->itr_val <= 3)
1110                 q_vector->itr_val = IGB_START_ITR;
1111 }
1112
1113 /**
1114  * igb_map_ring_to_vector - maps allocated queues to vectors
1115  *
1116  * This function maps the recently allocated queues to vectors.
1117  **/
1118 static int igb_map_ring_to_vector(struct igb_adapter *adapter)
1119 {
1120         int i;
1121         int v_idx = 0;
1122
1123         if ((adapter->num_q_vectors < adapter->num_rx_queues) ||
1124             (adapter->num_q_vectors < adapter->num_tx_queues))
1125                 return -ENOMEM;
1126
1127         if (adapter->num_q_vectors >=
1128             (adapter->num_rx_queues + adapter->num_tx_queues)) {
1129                 for (i = 0; i < adapter->num_rx_queues; i++)
1130                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1131                 for (i = 0; i < adapter->num_tx_queues; i++)
1132                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1133         } else {
1134                 for (i = 0; i < adapter->num_rx_queues; i++) {
1135                         if (i < adapter->num_tx_queues)
1136                                 igb_map_tx_ring_to_vector(adapter, i, v_idx);
1137                         igb_map_rx_ring_to_vector(adapter, i, v_idx++);
1138                 }
1139                 for (; i < adapter->num_tx_queues; i++)
1140                         igb_map_tx_ring_to_vector(adapter, i, v_idx++);
1141         }
1142         return 0;
1143 }
1144
1145 /**
1146  * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors
1147  *
1148  * This function initializes the interrupts and allocates all of the queues.
1149  **/
1150 static int igb_init_interrupt_scheme(struct igb_adapter *adapter)
1151 {
1152         struct pci_dev *pdev = adapter->pdev;
1153         int err;
1154
1155         igb_set_interrupt_capability(adapter);
1156
1157         err = igb_alloc_q_vectors(adapter);
1158         if (err) {
1159                 dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
1160                 goto err_alloc_q_vectors;
1161         }
1162
1163         err = igb_alloc_queues(adapter);
1164         if (err) {
1165                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
1166                 goto err_alloc_queues;
1167         }
1168
1169         err = igb_map_ring_to_vector(adapter);
1170         if (err) {
1171                 dev_err(&pdev->dev, "Invalid q_vector to ring mapping\n");
1172                 goto err_map_queues;
1173         }
1174
1175
1176         return 0;
1177 err_map_queues:
1178         igb_free_queues(adapter);
1179 err_alloc_queues:
1180         igb_free_q_vectors(adapter);
1181 err_alloc_q_vectors:
1182         igb_reset_interrupt_capability(adapter);
1183         return err;
1184 }
1185
1186 /**
1187  * igb_request_irq - initialize interrupts
1188  *
1189  * Attempts to configure interrupts using the best available
1190  * capabilities of the hardware and kernel.
1191  **/
1192 static int igb_request_irq(struct igb_adapter *adapter)
1193 {
1194         struct net_device *netdev = adapter->netdev;
1195         struct pci_dev *pdev = adapter->pdev;
1196         int err = 0;
1197
1198         if (adapter->msix_entries) {
1199                 err = igb_request_msix(adapter);
1200                 if (!err)
1201                         goto request_done;
1202                 /* fall back to MSI */
1203                 igb_clear_interrupt_scheme(adapter);
1204                 if (!pci_enable_msi(adapter->pdev))
1205                         adapter->flags |= IGB_FLAG_HAS_MSI;
1206                 igb_free_all_tx_resources(adapter);
1207                 igb_free_all_rx_resources(adapter);
1208                 adapter->num_tx_queues = 1;
1209                 adapter->num_rx_queues = 1;
1210                 adapter->num_q_vectors = 1;
1211                 err = igb_alloc_q_vectors(adapter);
1212                 if (err) {
1213                         dev_err(&pdev->dev,
1214                                 "Unable to allocate memory for vectors\n");
1215                         goto request_done;
1216                 }
1217                 err = igb_alloc_queues(adapter);
1218                 if (err) {
1219                         dev_err(&pdev->dev,
1220                                 "Unable to allocate memory for queues\n");
1221                         igb_free_q_vectors(adapter);
1222                         goto request_done;
1223                 }
1224                 igb_setup_all_tx_resources(adapter);
1225                 igb_setup_all_rx_resources(adapter);
1226         } else {
1227                 igb_assign_vector(adapter->q_vector[0], 0);
1228         }
1229
1230         if (adapter->flags & IGB_FLAG_HAS_MSI) {
1231                 err = request_irq(adapter->pdev->irq, igb_intr_msi, 0,
1232                                   netdev->name, adapter);
1233                 if (!err)
1234                         goto request_done;
1235
1236                 /* fall back to legacy interrupts */
1237                 igb_reset_interrupt_capability(adapter);
1238                 adapter->flags &= ~IGB_FLAG_HAS_MSI;
1239         }
1240
1241         err = request_irq(adapter->pdev->irq, igb_intr, IRQF_SHARED,
1242                           netdev->name, adapter);
1243
1244         if (err)
1245                 dev_err(&adapter->pdev->dev, "Error %d getting interrupt\n",
1246                         err);
1247
1248 request_done:
1249         return err;
1250 }
1251
1252 static void igb_free_irq(struct igb_adapter *adapter)
1253 {
1254         if (adapter->msix_entries) {
1255                 int vector = 0, i;
1256
1257                 free_irq(adapter->msix_entries[vector++].vector, adapter);
1258
1259                 for (i = 0; i < adapter->num_q_vectors; i++) {
1260                         struct igb_q_vector *q_vector = adapter->q_vector[i];
1261                         free_irq(adapter->msix_entries[vector++].vector,
1262                                  q_vector);
1263                 }
1264         } else {
1265                 free_irq(adapter->pdev->irq, adapter);
1266         }
1267 }
1268
1269 /**
1270  * igb_irq_disable - Mask off interrupt generation on the NIC
1271  * @adapter: board private structure
1272  **/
1273 static void igb_irq_disable(struct igb_adapter *adapter)
1274 {
1275         struct e1000_hw *hw = &adapter->hw;
1276
1277         /*
1278          * we need to be careful when disabling interrupts.  The VFs are also
1279          * mapped into these registers and so clearing the bits can cause
1280          * issues on the VF drivers so we only need to clear what we set
1281          */
1282         if (adapter->msix_entries) {
1283                 u32 regval = rd32(E1000_EIAM);
1284                 wr32(E1000_EIAM, regval & ~adapter->eims_enable_mask);
1285                 wr32(E1000_EIMC, adapter->eims_enable_mask);
1286                 regval = rd32(E1000_EIAC);
1287                 wr32(E1000_EIAC, regval & ~adapter->eims_enable_mask);
1288         }
1289
1290         wr32(E1000_IAM, 0);
1291         wr32(E1000_IMC, ~0);
1292         wrfl();
1293         synchronize_irq(adapter->pdev->irq);
1294 }
1295
1296 /**
1297  * igb_irq_enable - Enable default interrupt generation settings
1298  * @adapter: board private structure
1299  **/
1300 static void igb_irq_enable(struct igb_adapter *adapter)
1301 {
1302         struct e1000_hw *hw = &adapter->hw;
1303
1304         if (adapter->msix_entries) {
1305                 u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC;
1306                 u32 regval = rd32(E1000_EIAC);
1307                 wr32(E1000_EIAC, regval | adapter->eims_enable_mask);
1308                 regval = rd32(E1000_EIAM);
1309                 wr32(E1000_EIAM, regval | adapter->eims_enable_mask);
1310                 wr32(E1000_EIMS, adapter->eims_enable_mask);
1311                 if (adapter->vfs_allocated_count) {
1312                         wr32(E1000_MBVFIMR, 0xFF);
1313                         ims |= E1000_IMS_VMMB;
1314                 }
1315                 if (adapter->hw.mac.type == e1000_82580)
1316                         ims |= E1000_IMS_DRSTA;
1317
1318                 wr32(E1000_IMS, ims);
1319         } else {
1320                 wr32(E1000_IMS, IMS_ENABLE_MASK |
1321                                 E1000_IMS_DRSTA);
1322                 wr32(E1000_IAM, IMS_ENABLE_MASK |
1323                                 E1000_IMS_DRSTA);
1324         }
1325 }
1326
1327 static void igb_update_mng_vlan(struct igb_adapter *adapter)
1328 {
1329         struct e1000_hw *hw = &adapter->hw;
1330         u16 vid = adapter->hw.mng_cookie.vlan_id;
1331         u16 old_vid = adapter->mng_vlan_id;
1332
1333         if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) {
1334                 /* add VID to filter table */
1335                 igb_vfta_set(hw, vid, true);
1336                 adapter->mng_vlan_id = vid;
1337         } else {
1338                 adapter->mng_vlan_id = IGB_MNG_VLAN_NONE;
1339         }
1340
1341         if ((old_vid != (u16)IGB_MNG_VLAN_NONE) &&
1342             (vid != old_vid) &&
1343             !vlan_group_get_device(adapter->vlgrp, old_vid)) {
1344                 /* remove VID from filter table */
1345                 igb_vfta_set(hw, old_vid, false);
1346         }
1347 }
1348
1349 /**
1350  * igb_release_hw_control - release control of the h/w to f/w
1351  * @adapter: address of board private structure
1352  *
1353  * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit.
1354  * For ASF and Pass Through versions of f/w this means that the
1355  * driver is no longer loaded.
1356  *
1357  **/
1358 static void igb_release_hw_control(struct igb_adapter *adapter)
1359 {
1360         struct e1000_hw *hw = &adapter->hw;
1361         u32 ctrl_ext;
1362
1363         /* Let firmware take over control of h/w */
1364         ctrl_ext = rd32(E1000_CTRL_EXT);
1365         wr32(E1000_CTRL_EXT,
1366                         ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD);
1367 }
1368
1369 /**
1370  * igb_get_hw_control - get control of the h/w from f/w
1371  * @adapter: address of board private structure
1372  *
1373  * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit.
1374  * For ASF and Pass Through versions of f/w this means that
1375  * the driver is loaded.
1376  *
1377  **/
1378 static void igb_get_hw_control(struct igb_adapter *adapter)
1379 {
1380         struct e1000_hw *hw = &adapter->hw;
1381         u32 ctrl_ext;
1382
1383         /* Let firmware know the driver has taken over */
1384         ctrl_ext = rd32(E1000_CTRL_EXT);
1385         wr32(E1000_CTRL_EXT,
1386                         ctrl_ext | E1000_CTRL_EXT_DRV_LOAD);
1387 }
1388
1389 /**
1390  * igb_configure - configure the hardware for RX and TX
1391  * @adapter: private board structure
1392  **/
1393 static void igb_configure(struct igb_adapter *adapter)
1394 {
1395         struct net_device *netdev = adapter->netdev;
1396         int i;
1397
1398         igb_get_hw_control(adapter);
1399         igb_set_rx_mode(netdev);
1400
1401         igb_restore_vlan(adapter);
1402
1403         igb_setup_tctl(adapter);
1404         igb_setup_mrqc(adapter);
1405         igb_setup_rctl(adapter);
1406
1407         igb_configure_tx(adapter);
1408         igb_configure_rx(adapter);
1409
1410         igb_rx_fifo_flush_82575(&adapter->hw);
1411
1412         /* call igb_desc_unused which always leaves
1413          * at least 1 descriptor unused to make sure
1414          * next_to_use != next_to_clean */
1415         for (i = 0; i < adapter->num_rx_queues; i++) {
1416                 struct igb_ring *ring = adapter->rx_ring[i];
1417                 igb_alloc_rx_buffers_adv(ring, igb_desc_unused(ring));
1418         }
1419 }
1420
1421 /**
1422  * igb_power_up_link - Power up the phy/serdes link
1423  * @adapter: address of board private structure
1424  **/
1425 void igb_power_up_link(struct igb_adapter *adapter)
1426 {
1427         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1428                 igb_power_up_phy_copper(&adapter->hw);
1429         else
1430                 igb_power_up_serdes_link_82575(&adapter->hw);
1431 }
1432
1433 /**
1434  * igb_power_down_link - Power down the phy/serdes link
1435  * @adapter: address of board private structure
1436  */
1437 static void igb_power_down_link(struct igb_adapter *adapter)
1438 {
1439         if (adapter->hw.phy.media_type == e1000_media_type_copper)
1440                 igb_power_down_phy_copper_82575(&adapter->hw);
1441         else
1442                 igb_shutdown_serdes_link_82575(&adapter->hw);
1443 }
1444
1445 /**
1446  * igb_up - Open the interface and prepare it to handle traffic
1447  * @adapter: board private structure
1448  **/
1449 int igb_up(struct igb_adapter *adapter)
1450 {
1451         struct e1000_hw *hw = &adapter->hw;
1452         int i;
1453
1454         /* hardware has been reset, we need to reload some things */
1455         igb_configure(adapter);
1456
1457         clear_bit(__IGB_DOWN, &adapter->state);
1458
1459         for (i = 0; i < adapter->num_q_vectors; i++) {
1460                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1461                 napi_enable(&q_vector->napi);
1462         }
1463         if (adapter->msix_entries)
1464                 igb_configure_msix(adapter);
1465         else
1466                 igb_assign_vector(adapter->q_vector[0], 0);
1467
1468         /* Clear any pending interrupts. */
1469         rd32(E1000_ICR);
1470         igb_irq_enable(adapter);
1471
1472         /* notify VFs that reset has been completed */
1473         if (adapter->vfs_allocated_count) {
1474                 u32 reg_data = rd32(E1000_CTRL_EXT);
1475                 reg_data |= E1000_CTRL_EXT_PFRSTD;
1476                 wr32(E1000_CTRL_EXT, reg_data);
1477         }
1478
1479         netif_tx_start_all_queues(adapter->netdev);
1480
1481         /* start the watchdog. */
1482         hw->mac.get_link_status = 1;
1483         schedule_work(&adapter->watchdog_task);
1484
1485         return 0;
1486 }
1487
1488 void igb_down(struct igb_adapter *adapter)
1489 {
1490         struct net_device *netdev = adapter->netdev;
1491         struct e1000_hw *hw = &adapter->hw;
1492         u32 tctl, rctl;
1493         int i;
1494
1495         /* signal that we're down so the interrupt handler does not
1496          * reschedule our watchdog timer */
1497         set_bit(__IGB_DOWN, &adapter->state);
1498
1499         /* disable receives in the hardware */
1500         rctl = rd32(E1000_RCTL);
1501         wr32(E1000_RCTL, rctl & ~E1000_RCTL_EN);
1502         /* flush and sleep below */
1503
1504         netif_tx_stop_all_queues(netdev);
1505
1506         /* disable transmits in the hardware */
1507         tctl = rd32(E1000_TCTL);
1508         tctl &= ~E1000_TCTL_EN;
1509         wr32(E1000_TCTL, tctl);
1510         /* flush both disables and wait for them to finish */
1511         wrfl();
1512         msleep(10);
1513
1514         for (i = 0; i < adapter->num_q_vectors; i++) {
1515                 struct igb_q_vector *q_vector = adapter->q_vector[i];
1516                 napi_disable(&q_vector->napi);
1517         }
1518
1519         igb_irq_disable(adapter);
1520
1521         del_timer_sync(&adapter->watchdog_timer);
1522         del_timer_sync(&adapter->phy_info_timer);
1523
1524         netif_carrier_off(netdev);
1525
1526         /* record the stats before reset*/
1527         igb_update_stats(adapter);
1528
1529         adapter->link_speed = 0;
1530         adapter->link_duplex = 0;
1531
1532         if (!pci_channel_offline(adapter->pdev))
1533                 igb_reset(adapter);
1534         igb_clean_all_tx_rings(adapter);
1535         igb_clean_all_rx_rings(adapter);
1536 #ifdef CONFIG_IGB_DCA
1537
1538         /* since we reset the hardware DCA settings were cleared */
1539         igb_setup_dca(adapter);
1540 #endif
1541 }
1542
1543 void igb_reinit_locked(struct igb_adapter *adapter)
1544 {
1545         WARN_ON(in_interrupt());
1546         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
1547                 msleep(1);
1548         igb_down(adapter);
1549         igb_up(adapter);
1550         clear_bit(__IGB_RESETTING, &adapter->state);
1551 }
1552
1553 void igb_reset(struct igb_adapter *adapter)
1554 {
1555         struct pci_dev *pdev = adapter->pdev;
1556         struct e1000_hw *hw = &adapter->hw;
1557         struct e1000_mac_info *mac = &hw->mac;
1558         struct e1000_fc_info *fc = &hw->fc;
1559         u32 pba = 0, tx_space, min_tx_space, min_rx_space;
1560         u16 hwm;
1561
1562         /* Repartition Pba for greater than 9k mtu
1563          * To take effect CTRL.RST is required.
1564          */
1565         switch (mac->type) {
1566         case e1000_i350:
1567         case e1000_82580:
1568                 pba = rd32(E1000_RXPBS);
1569                 pba = igb_rxpbs_adjust_82580(pba);
1570                 break;
1571         case e1000_82576:
1572                 pba = rd32(E1000_RXPBS);
1573                 pba &= E1000_RXPBS_SIZE_MASK_82576;
1574                 break;
1575         case e1000_82575:
1576         default:
1577                 pba = E1000_PBA_34K;
1578                 break;
1579         }
1580
1581         if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) &&
1582             (mac->type < e1000_82576)) {
1583                 /* adjust PBA for jumbo frames */
1584                 wr32(E1000_PBA, pba);
1585
1586                 /* To maintain wire speed transmits, the Tx FIFO should be
1587                  * large enough to accommodate two full transmit packets,
1588                  * rounded up to the next 1KB and expressed in KB.  Likewise,
1589                  * the Rx FIFO should be large enough to accommodate at least
1590                  * one full receive packet and is similarly rounded up and
1591                  * expressed in KB. */
1592                 pba = rd32(E1000_PBA);
1593                 /* upper 16 bits has Tx packet buffer allocation size in KB */
1594                 tx_space = pba >> 16;
1595                 /* lower 16 bits has Rx packet buffer allocation size in KB */
1596                 pba &= 0xffff;
1597                 /* the tx fifo also stores 16 bytes of information about the tx
1598                  * but don't include ethernet FCS because hardware appends it */
1599                 min_tx_space = (adapter->max_frame_size +
1600                                 sizeof(union e1000_adv_tx_desc) -
1601                                 ETH_FCS_LEN) * 2;
1602                 min_tx_space = ALIGN(min_tx_space, 1024);
1603                 min_tx_space >>= 10;
1604                 /* software strips receive CRC, so leave room for it */
1605                 min_rx_space = adapter->max_frame_size;
1606                 min_rx_space = ALIGN(min_rx_space, 1024);
1607                 min_rx_space >>= 10;
1608
1609                 /* If current Tx allocation is less than the min Tx FIFO size,
1610                  * and the min Tx FIFO size is less than the current Rx FIFO
1611                  * allocation, take space away from current Rx allocation */
1612                 if (tx_space < min_tx_space &&
1613                     ((min_tx_space - tx_space) < pba)) {
1614                         pba = pba - (min_tx_space - tx_space);
1615
1616                         /* if short on rx space, rx wins and must trump tx
1617                          * adjustment */
1618                         if (pba < min_rx_space)
1619                                 pba = min_rx_space;
1620                 }
1621                 wr32(E1000_PBA, pba);
1622         }
1623
1624         /* flow control settings */
1625         /* The high water mark must be low enough to fit one full frame
1626          * (or the size used for early receive) above it in the Rx FIFO.
1627          * Set it to the lower of:
1628          * - 90% of the Rx FIFO size, or
1629          * - the full Rx FIFO size minus one full frame */
1630         hwm = min(((pba << 10) * 9 / 10),
1631                         ((pba << 10) - 2 * adapter->max_frame_size));
1632
1633         fc->high_water = hwm & 0xFFF0;  /* 16-byte granularity */
1634         fc->low_water = fc->high_water - 16;
1635         fc->pause_time = 0xFFFF;
1636         fc->send_xon = 1;
1637         fc->current_mode = fc->requested_mode;
1638
1639         /* disable receive for all VFs and wait one second */
1640         if (adapter->vfs_allocated_count) {
1641                 int i;
1642                 for (i = 0 ; i < adapter->vfs_allocated_count; i++)
1643                         adapter->vf_data[i].flags = 0;
1644
1645                 /* ping all the active vfs to let them know we are going down */
1646                 igb_ping_all_vfs(adapter);
1647
1648                 /* disable transmits and receives */
1649                 wr32(E1000_VFRE, 0);
1650                 wr32(E1000_VFTE, 0);
1651         }
1652
1653         /* Allow time for pending master requests to run */
1654         hw->mac.ops.reset_hw(hw);
1655         wr32(E1000_WUC, 0);
1656
1657         if (hw->mac.ops.init_hw(hw))
1658                 dev_err(&pdev->dev, "Hardware Error\n");
1659
1660         if (hw->mac.type == e1000_82580) {
1661                 u32 reg = rd32(E1000_PCIEMISC);
1662                 wr32(E1000_PCIEMISC,
1663                                 reg & ~E1000_PCIEMISC_LX_DECISION);
1664         }
1665         if (!netif_running(adapter->netdev))
1666                 igb_power_down_link(adapter);
1667
1668         igb_update_mng_vlan(adapter);
1669
1670         /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */
1671         wr32(E1000_VET, ETHERNET_IEEE_VLAN_TYPE);
1672
1673         igb_get_phy_info(hw);
1674 }
1675
1676 static const struct net_device_ops igb_netdev_ops = {
1677         .ndo_open               = igb_open,
1678         .ndo_stop               = igb_close,
1679         .ndo_start_xmit         = igb_xmit_frame_adv,
1680         .ndo_get_stats          = igb_get_stats,
1681         .ndo_set_rx_mode        = igb_set_rx_mode,
1682         .ndo_set_multicast_list = igb_set_rx_mode,
1683         .ndo_set_mac_address    = igb_set_mac,
1684         .ndo_change_mtu         = igb_change_mtu,
1685         .ndo_do_ioctl           = igb_ioctl,
1686         .ndo_tx_timeout         = igb_tx_timeout,
1687         .ndo_validate_addr      = eth_validate_addr,
1688         .ndo_vlan_rx_register   = igb_vlan_rx_register,
1689         .ndo_vlan_rx_add_vid    = igb_vlan_rx_add_vid,
1690         .ndo_vlan_rx_kill_vid   = igb_vlan_rx_kill_vid,
1691         .ndo_set_vf_mac         = igb_ndo_set_vf_mac,
1692         .ndo_set_vf_vlan        = igb_ndo_set_vf_vlan,
1693         .ndo_set_vf_tx_rate     = igb_ndo_set_vf_bw,
1694         .ndo_get_vf_config      = igb_ndo_get_vf_config,
1695 #ifdef CONFIG_NET_POLL_CONTROLLER
1696         .ndo_poll_controller    = igb_netpoll,
1697 #endif
1698 };
1699
1700 /**
1701  * igb_probe - Device Initialization Routine
1702  * @pdev: PCI device information struct
1703  * @ent: entry in igb_pci_tbl
1704  *
1705  * Returns 0 on success, negative on failure
1706  *
1707  * igb_probe initializes an adapter identified by a pci_dev structure.
1708  * The OS initialization, configuring of the adapter private structure,
1709  * and a hardware reset occur.
1710  **/
1711 static int __devinit igb_probe(struct pci_dev *pdev,
1712                                const struct pci_device_id *ent)
1713 {
1714         struct net_device *netdev;
1715         struct igb_adapter *adapter;
1716         struct e1000_hw *hw;
1717         u16 eeprom_data = 0;
1718         static int global_quad_port_a; /* global quad port a indication */
1719         const struct e1000_info *ei = igb_info_tbl[ent->driver_data];
1720         unsigned long mmio_start, mmio_len;
1721         int err, pci_using_dac;
1722         u16 eeprom_apme_mask = IGB_EEPROM_APME;
1723         u32 part_num;
1724
1725         /* Catch broken hardware that put the wrong VF device ID in
1726          * the PCIe SR-IOV capability.
1727          */
1728         if (pdev->is_virtfn) {
1729                 WARN(1, KERN_ERR "%s (%hx:%hx) should not be a VF!\n",
1730                      pci_name(pdev), pdev->vendor, pdev->device);
1731                 return -EINVAL;
1732         }
1733
1734         err = pci_enable_device_mem(pdev);
1735         if (err)
1736                 return err;
1737
1738         pci_using_dac = 0;
1739         err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
1740         if (!err) {
1741                 err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(64));
1742                 if (!err)
1743                         pci_using_dac = 1;
1744         } else {
1745                 err = dma_set_mask(&pdev->dev, DMA_BIT_MASK(32));
1746                 if (err) {
1747                         err = dma_set_coherent_mask(&pdev->dev, DMA_BIT_MASK(32));
1748                         if (err) {
1749                                 dev_err(&pdev->dev, "No usable DMA "
1750                                         "configuration, aborting\n");
1751                                 goto err_dma;
1752                         }
1753                 }
1754         }
1755
1756         err = pci_request_selected_regions(pdev, pci_select_bars(pdev,
1757                                            IORESOURCE_MEM),
1758                                            igb_driver_name);
1759         if (err)
1760                 goto err_pci_reg;
1761
1762         pci_enable_pcie_error_reporting(pdev);
1763
1764         pci_set_master(pdev);
1765         pci_save_state(pdev);
1766
1767         err = -ENOMEM;
1768         netdev = alloc_etherdev_mq(sizeof(struct igb_adapter),
1769                                    IGB_ABS_MAX_TX_QUEUES);
1770         if (!netdev)
1771                 goto err_alloc_etherdev;
1772
1773         SET_NETDEV_DEV(netdev, &pdev->dev);
1774
1775         pci_set_drvdata(pdev, netdev);
1776         adapter = netdev_priv(netdev);
1777         adapter->netdev = netdev;
1778         adapter->pdev = pdev;
1779         hw = &adapter->hw;
1780         hw->back = adapter;
1781         adapter->msg_enable = NETIF_MSG_DRV | NETIF_MSG_PROBE;
1782
1783         mmio_start = pci_resource_start(pdev, 0);
1784         mmio_len = pci_resource_len(pdev, 0);
1785
1786         err = -EIO;
1787         hw->hw_addr = ioremap(mmio_start, mmio_len);
1788         if (!hw->hw_addr)
1789                 goto err_ioremap;
1790
1791         netdev->netdev_ops = &igb_netdev_ops;
1792         igb_set_ethtool_ops(netdev);
1793         netdev->watchdog_timeo = 5 * HZ;
1794
1795         strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1);
1796
1797         netdev->mem_start = mmio_start;
1798         netdev->mem_end = mmio_start + mmio_len;
1799
1800         /* PCI config space info */
1801         hw->vendor_id = pdev->vendor;
1802         hw->device_id = pdev->device;
1803         hw->revision_id = pdev->revision;
1804         hw->subsystem_vendor_id = pdev->subsystem_vendor;
1805         hw->subsystem_device_id = pdev->subsystem_device;
1806
1807         /* Copy the default MAC, PHY and NVM function pointers */
1808         memcpy(&hw->mac.ops, ei->mac_ops, sizeof(hw->mac.ops));
1809         memcpy(&hw->phy.ops, ei->phy_ops, sizeof(hw->phy.ops));
1810         memcpy(&hw->nvm.ops, ei->nvm_ops, sizeof(hw->nvm.ops));
1811         /* Initialize skew-specific constants */
1812         err = ei->get_invariants(hw);
1813         if (err)
1814                 goto err_sw_init;
1815
1816         /* setup the private structure */
1817         err = igb_sw_init(adapter);
1818         if (err)
1819                 goto err_sw_init;
1820
1821         igb_get_bus_info_pcie(hw);
1822
1823         hw->phy.autoneg_wait_to_complete = false;
1824
1825         /* Copper options */
1826         if (hw->phy.media_type == e1000_media_type_copper) {
1827                 hw->phy.mdix = AUTO_ALL_MODES;
1828                 hw->phy.disable_polarity_correction = false;
1829                 hw->phy.ms_type = e1000_ms_hw_default;
1830         }
1831
1832         if (igb_check_reset_block(hw))
1833                 dev_info(&pdev->dev,
1834                         "PHY reset is blocked due to SOL/IDER session.\n");
1835
1836         netdev->features = NETIF_F_SG |
1837                            NETIF_F_IP_CSUM |
1838                            NETIF_F_HW_VLAN_TX |
1839                            NETIF_F_HW_VLAN_RX |
1840                            NETIF_F_HW_VLAN_FILTER;
1841
1842         netdev->features |= NETIF_F_IPV6_CSUM;
1843         netdev->features |= NETIF_F_TSO;
1844         netdev->features |= NETIF_F_TSO6;
1845         netdev->features |= NETIF_F_GRO;
1846
1847         netdev->vlan_features |= NETIF_F_TSO;
1848         netdev->vlan_features |= NETIF_F_TSO6;
1849         netdev->vlan_features |= NETIF_F_IP_CSUM;
1850         netdev->vlan_features |= NETIF_F_IPV6_CSUM;
1851         netdev->vlan_features |= NETIF_F_SG;
1852
1853         if (pci_using_dac)
1854                 netdev->features |= NETIF_F_HIGHDMA;
1855
1856         if (hw->mac.type >= e1000_82576)
1857                 netdev->features |= NETIF_F_SCTP_CSUM;
1858
1859         adapter->en_mng_pt = igb_enable_mng_pass_thru(hw);
1860
1861         /* before reading the NVM, reset the controller to put the device in a
1862          * known good starting state */
1863         hw->mac.ops.reset_hw(hw);
1864
1865         /* make sure the NVM is good */
1866         if (igb_validate_nvm_checksum(hw) < 0) {
1867                 dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
1868                 err = -EIO;
1869                 goto err_eeprom;
1870         }
1871
1872         /* copy the MAC address out of the NVM */
1873         if (hw->mac.ops.read_mac_addr(hw))
1874                 dev_err(&pdev->dev, "NVM Read Error\n");
1875
1876         memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len);
1877         memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len);
1878
1879         if (!is_valid_ether_addr(netdev->perm_addr)) {
1880                 dev_err(&pdev->dev, "Invalid MAC Address\n");
1881                 err = -EIO;
1882                 goto err_eeprom;
1883         }
1884
1885         setup_timer(&adapter->watchdog_timer, &igb_watchdog,
1886                     (unsigned long) adapter);
1887         setup_timer(&adapter->phy_info_timer, &igb_update_phy_info,
1888                     (unsigned long) adapter);
1889
1890         INIT_WORK(&adapter->reset_task, igb_reset_task);
1891         INIT_WORK(&adapter->watchdog_task, igb_watchdog_task);
1892
1893         /* Initialize link properties that are user-changeable */
1894         adapter->fc_autoneg = true;
1895         hw->mac.autoneg = true;
1896         hw->phy.autoneg_advertised = 0x2f;
1897
1898         hw->fc.requested_mode = e1000_fc_default;
1899         hw->fc.current_mode = e1000_fc_default;
1900
1901         igb_validate_mdi_setting(hw);
1902
1903         /* Initial Wake on LAN setting If APM wake is enabled in the EEPROM,
1904          * enable the ACPI Magic Packet filter
1905          */
1906
1907         if (hw->bus.func == 0)
1908                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A, 1, &eeprom_data);
1909         else if (hw->mac.type == e1000_82580)
1910                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A +
1911                                  NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1,
1912                                  &eeprom_data);
1913         else if (hw->bus.func == 1)
1914                 hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data);
1915
1916         if (eeprom_data & eeprom_apme_mask)
1917                 adapter->eeprom_wol |= E1000_WUFC_MAG;
1918
1919         /* now that we have the eeprom settings, apply the special cases where
1920          * the eeprom may be wrong or the board simply won't support wake on
1921          * lan on a particular port */
1922         switch (pdev->device) {
1923         case E1000_DEV_ID_82575GB_QUAD_COPPER:
1924                 adapter->eeprom_wol = 0;
1925                 break;
1926         case E1000_DEV_ID_82575EB_FIBER_SERDES:
1927         case E1000_DEV_ID_82576_FIBER:
1928         case E1000_DEV_ID_82576_SERDES:
1929                 /* Wake events only supported on port A for dual fiber
1930                  * regardless of eeprom setting */
1931                 if (rd32(E1000_STATUS) & E1000_STATUS_FUNC_1)
1932                         adapter->eeprom_wol = 0;
1933                 break;
1934         case E1000_DEV_ID_82576_QUAD_COPPER:
1935         case E1000_DEV_ID_82576_QUAD_COPPER_ET2:
1936                 /* if quad port adapter, disable WoL on all but port A */
1937                 if (global_quad_port_a != 0)
1938                         adapter->eeprom_wol = 0;
1939                 else
1940                         adapter->flags |= IGB_FLAG_QUAD_PORT_A;
1941                 /* Reset for multiple quad port adapters */
1942                 if (++global_quad_port_a == 4)
1943                         global_quad_port_a = 0;
1944                 break;
1945         }
1946
1947         /* initialize the wol settings based on the eeprom settings */
1948         adapter->wol = adapter->eeprom_wol;
1949         device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol);
1950
1951         /* reset the hardware with the new settings */
1952         igb_reset(adapter);
1953
1954         /* let the f/w know that the h/w is now under the control of the
1955          * driver. */
1956         igb_get_hw_control(adapter);
1957
1958         strcpy(netdev->name, "eth%d");
1959         err = register_netdev(netdev);
1960         if (err)
1961                 goto err_register;
1962
1963         /* carrier off reporting is important to ethtool even BEFORE open */
1964         netif_carrier_off(netdev);
1965
1966 #ifdef CONFIG_IGB_DCA
1967         if (dca_add_requester(&pdev->dev) == 0) {
1968                 adapter->flags |= IGB_FLAG_DCA_ENABLED;
1969                 dev_info(&pdev->dev, "DCA enabled\n");
1970                 igb_setup_dca(adapter);
1971         }
1972
1973 #endif
1974         dev_info(&pdev->dev, "Intel(R) Gigabit Ethernet Network Connection\n");
1975         /* print bus type/speed/width info */
1976         dev_info(&pdev->dev, "%s: (PCIe:%s:%s) %pM\n",
1977                  netdev->name,
1978                  ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5Gb/s" :
1979                   (hw->bus.speed == e1000_bus_speed_5000) ? "5.0Gb/s" :
1980                                                             "unknown"),
1981                  ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" :
1982                   (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" :
1983                   (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" :
1984                    "unknown"),
1985                  netdev->dev_addr);
1986
1987         igb_read_part_num(hw, &part_num);
1988         dev_info(&pdev->dev, "%s: PBA No: %06x-%03x\n", netdev->name,
1989                 (part_num >> 8), (part_num & 0xff));
1990
1991         dev_info(&pdev->dev,
1992                 "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n",
1993                 adapter->msix_entries ? "MSI-X" :
1994                 (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy",
1995                 adapter->num_rx_queues, adapter->num_tx_queues);
1996
1997         return 0;
1998
1999 err_register:
2000         igb_release_hw_control(adapter);
2001 err_eeprom:
2002         if (!igb_check_reset_block(hw))
2003                 igb_reset_phy(hw);
2004
2005         if (hw->flash_address)
2006                 iounmap(hw->flash_address);
2007 err_sw_init:
2008         igb_clear_interrupt_scheme(adapter);
2009         iounmap(hw->hw_addr);
2010 err_ioremap:
2011         free_netdev(netdev);
2012 err_alloc_etherdev:
2013         pci_release_selected_regions(pdev,
2014                                      pci_select_bars(pdev, IORESOURCE_MEM));
2015 err_pci_reg:
2016 err_dma:
2017         pci_disable_device(pdev);
2018         return err;
2019 }
2020
2021 /**
2022  * igb_remove - Device Removal Routine
2023  * @pdev: PCI device information struct
2024  *
2025  * igb_remove is called by the PCI subsystem to alert the driver
2026  * that it should release a PCI device.  The could be caused by a
2027  * Hot-Plug event, or because the driver is going to be removed from
2028  * memory.
2029  **/
2030 static void __devexit igb_remove(struct pci_dev *pdev)
2031 {
2032         struct net_device *netdev = pci_get_drvdata(pdev);
2033         struct igb_adapter *adapter = netdev_priv(netdev);
2034         struct e1000_hw *hw = &adapter->hw;
2035
2036         /* flush_scheduled work may reschedule our watchdog task, so
2037          * explicitly disable watchdog tasks from being rescheduled  */
2038         set_bit(__IGB_DOWN, &adapter->state);
2039         del_timer_sync(&adapter->watchdog_timer);
2040         del_timer_sync(&adapter->phy_info_timer);
2041
2042         flush_scheduled_work();
2043
2044 #ifdef CONFIG_IGB_DCA
2045         if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
2046                 dev_info(&pdev->dev, "DCA disabled\n");
2047                 dca_remove_requester(&pdev->dev);
2048                 adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
2049                 wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
2050         }
2051 #endif
2052
2053         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
2054          * would have already happened in close and is redundant. */
2055         igb_release_hw_control(adapter);
2056
2057         unregister_netdev(netdev);
2058
2059         igb_clear_interrupt_scheme(adapter);
2060
2061 #ifdef CONFIG_PCI_IOV
2062         /* reclaim resources allocated to VFs */
2063         if (adapter->vf_data) {
2064                 /* disable iov and allow time for transactions to clear */
2065                 pci_disable_sriov(pdev);
2066                 msleep(500);
2067
2068                 kfree(adapter->vf_data);
2069                 adapter->vf_data = NULL;
2070                 wr32(E1000_IOVCTL, E1000_IOVCTL_REUSE_VFQ);
2071                 msleep(100);
2072                 dev_info(&pdev->dev, "IOV Disabled\n");
2073         }
2074 #endif
2075
2076         iounmap(hw->hw_addr);
2077         if (hw->flash_address)
2078                 iounmap(hw->flash_address);
2079         pci_release_selected_regions(pdev,
2080                                      pci_select_bars(pdev, IORESOURCE_MEM));
2081
2082         free_netdev(netdev);
2083
2084         pci_disable_pcie_error_reporting(pdev);
2085
2086         pci_disable_device(pdev);
2087 }
2088
2089 /**
2090  * igb_probe_vfs - Initialize vf data storage and add VFs to pci config space
2091  * @adapter: board private structure to initialize
2092  *
2093  * This function initializes the vf specific data storage and then attempts to
2094  * allocate the VFs.  The reason for ordering it this way is because it is much
2095  * mor expensive time wise to disable SR-IOV than it is to allocate and free
2096  * the memory for the VFs.
2097  **/
2098 static void __devinit igb_probe_vfs(struct igb_adapter * adapter)
2099 {
2100 #ifdef CONFIG_PCI_IOV
2101         struct pci_dev *pdev = adapter->pdev;
2102
2103         if (adapter->vfs_allocated_count > 7)
2104                 adapter->vfs_allocated_count = 7;
2105
2106         if (adapter->vfs_allocated_count) {
2107                 adapter->vf_data = kcalloc(adapter->vfs_allocated_count,
2108                                            sizeof(struct vf_data_storage),
2109                                            GFP_KERNEL);
2110                 /* if allocation failed then we do not support SR-IOV */
2111                 if (!adapter->vf_data) {
2112                         adapter->vfs_allocated_count = 0;
2113                         dev_err(&pdev->dev, "Unable to allocate memory for VF "
2114                                 "Data Storage\n");
2115                 }
2116         }
2117
2118         if (pci_enable_sriov(pdev, adapter->vfs_allocated_count)) {
2119                 kfree(adapter->vf_data);
2120                 adapter->vf_data = NULL;
2121 #endif /* CONFIG_PCI_IOV */
2122                 adapter->vfs_allocated_count = 0;
2123 #ifdef CONFIG_PCI_IOV
2124         } else {
2125                 unsigned char mac_addr[ETH_ALEN];
2126                 int i;
2127                 dev_info(&pdev->dev, "%d vfs allocated\n",
2128                          adapter->vfs_allocated_count);
2129                 for (i = 0; i < adapter->vfs_allocated_count; i++) {
2130                         random_ether_addr(mac_addr);
2131                         igb_set_vf_mac(adapter, i, mac_addr);
2132                 }
2133         }
2134 #endif /* CONFIG_PCI_IOV */
2135 }
2136
2137
2138 /**
2139  * igb_init_hw_timer - Initialize hardware timer used with IEEE 1588 timestamp
2140  * @adapter: board private structure to initialize
2141  *
2142  * igb_init_hw_timer initializes the function pointer and values for the hw
2143  * timer found in hardware.
2144  **/
2145 static void igb_init_hw_timer(struct igb_adapter *adapter)
2146 {
2147         struct e1000_hw *hw = &adapter->hw;
2148
2149         switch (hw->mac.type) {
2150         case e1000_i350:
2151         case e1000_82580:
2152                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2153                 adapter->cycles.read = igb_read_clock;
2154                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2155                 adapter->cycles.mult = 1;
2156                 /*
2157                  * The 82580 timesync updates the system timer every 8ns by 8ns
2158                  * and the value cannot be shifted.  Instead we need to shift
2159                  * the registers to generate a 64bit timer value.  As a result
2160                  * SYSTIMR/L/H, TXSTMPL/H, RXSTMPL/H all have to be shifted by
2161                  * 24 in order to generate a larger value for synchronization.
2162                  */
2163                 adapter->cycles.shift = IGB_82580_TSYNC_SHIFT;
2164                 /* disable system timer temporarily by setting bit 31 */
2165                 wr32(E1000_TSAUXC, 0x80000000);
2166                 wrfl();
2167
2168                 /* Set registers so that rollover occurs soon to test this. */
2169                 wr32(E1000_SYSTIMR, 0x00000000);
2170                 wr32(E1000_SYSTIML, 0x80000000);
2171                 wr32(E1000_SYSTIMH, 0x000000FF);
2172                 wrfl();
2173
2174                 /* enable system timer by clearing bit 31 */
2175                 wr32(E1000_TSAUXC, 0x0);
2176                 wrfl();
2177
2178                 timecounter_init(&adapter->clock,
2179                                  &adapter->cycles,
2180                                  ktime_to_ns(ktime_get_real()));
2181                 /*
2182                  * Synchronize our NIC clock against system wall clock. NIC
2183                  * time stamp reading requires ~3us per sample, each sample
2184                  * was pretty stable even under load => only require 10
2185                  * samples for each offset comparison.
2186                  */
2187                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2188                 adapter->compare.source = &adapter->clock;
2189                 adapter->compare.target = ktime_get_real;
2190                 adapter->compare.num_samples = 10;
2191                 timecompare_update(&adapter->compare, 0);
2192                 break;
2193         case e1000_82576:
2194                 /*
2195                  * Initialize hardware timer: we keep it running just in case
2196                  * that some program needs it later on.
2197                  */
2198                 memset(&adapter->cycles, 0, sizeof(adapter->cycles));
2199                 adapter->cycles.read = igb_read_clock;
2200                 adapter->cycles.mask = CLOCKSOURCE_MASK(64);
2201                 adapter->cycles.mult = 1;
2202                 /**
2203                  * Scale the NIC clock cycle by a large factor so that
2204                  * relatively small clock corrections can be added or
2205                  * substracted at each clock tick. The drawbacks of a large
2206                  * factor are a) that the clock register overflows more quickly
2207                  * (not such a big deal) and b) that the increment per tick has
2208                  * to fit into 24 bits.  As a result we need to use a shift of
2209                  * 19 so we can fit a value of 16 into the TIMINCA register.
2210                  */
2211                 adapter->cycles.shift = IGB_82576_TSYNC_SHIFT;
2212                 wr32(E1000_TIMINCA,
2213                                 (1 << E1000_TIMINCA_16NS_SHIFT) |
2214                                 (16 << IGB_82576_TSYNC_SHIFT));
2215
2216                 /* Set registers so that rollover occurs soon to test this. */
2217                 wr32(E1000_SYSTIML, 0x00000000);
2218                 wr32(E1000_SYSTIMH, 0xFF800000);
2219                 wrfl();
2220
2221                 timecounter_init(&adapter->clock,
2222                                  &adapter->cycles,
2223                                  ktime_to_ns(ktime_get_real()));
2224                 /*
2225                  * Synchronize our NIC clock against system wall clock. NIC
2226                  * time stamp reading requires ~3us per sample, each sample
2227                  * was pretty stable even under load => only require 10
2228                  * samples for each offset comparison.
2229                  */
2230                 memset(&adapter->compare, 0, sizeof(adapter->compare));
2231                 adapter->compare.source = &adapter->clock;
2232                 adapter->compare.target = ktime_get_real;
2233                 adapter->compare.num_samples = 10;
2234                 timecompare_update(&adapter->compare, 0);
2235                 break;
2236         case e1000_82575:
2237                 /* 82575 does not support timesync */
2238         default:
2239                 break;
2240         }
2241
2242 }
2243
2244 /**
2245  * igb_sw_init - Initialize general software structures (struct igb_adapter)
2246  * @adapter: board private structure to initialize
2247  *
2248  * igb_sw_init initializes the Adapter private data structure.
2249  * Fields are initialized based on PCI device information and
2250  * OS network device settings (MTU size).
2251  **/
2252 static int __devinit igb_sw_init(struct igb_adapter *adapter)
2253 {
2254         struct e1000_hw *hw = &adapter->hw;
2255         struct net_device *netdev = adapter->netdev;
2256         struct pci_dev *pdev = adapter->pdev;
2257
2258         pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
2259
2260         adapter->tx_ring_count = IGB_DEFAULT_TXD;
2261         adapter->rx_ring_count = IGB_DEFAULT_RXD;
2262         adapter->rx_itr_setting = IGB_DEFAULT_ITR;
2263         adapter->tx_itr_setting = IGB_DEFAULT_ITR;
2264
2265         adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN;
2266         adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
2267
2268 #ifdef CONFIG_PCI_IOV
2269         if (hw->mac.type == e1000_82576)
2270                 adapter->vfs_allocated_count = max_vfs;
2271
2272 #endif /* CONFIG_PCI_IOV */
2273         adapter->rss_queues = min_t(u32, IGB_MAX_RX_QUEUES, num_online_cpus());
2274
2275         /*
2276          * if rss_queues > 4 or vfs are going to be allocated with rss_queues
2277          * then we should combine the queues into a queue pair in order to
2278          * conserve interrupts due to limited supply
2279          */
2280         if ((adapter->rss_queues > 4) ||
2281             ((adapter->rss_queues > 1) && (adapter->vfs_allocated_count > 6)))
2282                 adapter->flags |= IGB_FLAG_QUEUE_PAIRS;
2283
2284         /* This call may decrease the number of queues */
2285         if (igb_init_interrupt_scheme(adapter)) {
2286                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
2287                 return -ENOMEM;
2288         }
2289
2290         igb_init_hw_timer(adapter);
2291         igb_probe_vfs(adapter);
2292
2293         /* Explicitly disable IRQ since the NIC can be in any state. */
2294         igb_irq_disable(adapter);
2295
2296         set_bit(__IGB_DOWN, &adapter->state);
2297         return 0;
2298 }
2299
2300 /**
2301  * igb_open - Called when a network interface is made active
2302  * @netdev: network interface device structure
2303  *
2304  * Returns 0 on success, negative value on failure
2305  *
2306  * The open entry point is called when a network interface is made
2307  * active by the system (IFF_UP).  At this point all resources needed
2308  * for transmit and receive operations are allocated, the interrupt
2309  * handler is registered with the OS, the watchdog timer is started,
2310  * and the stack is notified that the interface is ready.
2311  **/
2312 static int igb_open(struct net_device *netdev)
2313 {
2314         struct igb_adapter *adapter = netdev_priv(netdev);
2315         struct e1000_hw *hw = &adapter->hw;
2316         int err;
2317         int i;
2318
2319         /* disallow open during test */
2320         if (test_bit(__IGB_TESTING, &adapter->state))
2321                 return -EBUSY;
2322
2323         netif_carrier_off(netdev);
2324
2325         /* allocate transmit descriptors */
2326         err = igb_setup_all_tx_resources(adapter);
2327         if (err)
2328                 goto err_setup_tx;
2329
2330         /* allocate receive descriptors */
2331         err = igb_setup_all_rx_resources(adapter);
2332         if (err)
2333                 goto err_setup_rx;
2334
2335         igb_power_up_link(adapter);
2336
2337         /* before we allocate an interrupt, we must be ready to handle it.
2338          * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt
2339          * as soon as we call pci_request_irq, so we have to setup our
2340          * clean_rx handler before we do so.  */
2341         igb_configure(adapter);
2342
2343         err = igb_request_irq(adapter);
2344         if (err)
2345                 goto err_req_irq;
2346
2347         /* From here on the code is the same as igb_up() */
2348         clear_bit(__IGB_DOWN, &adapter->state);
2349
2350         for (i = 0; i < adapter->num_q_vectors; i++) {
2351                 struct igb_q_vector *q_vector = adapter->q_vector[i];
2352                 napi_enable(&q_vector->napi);
2353         }
2354
2355         /* Clear any pending interrupts. */
2356         rd32(E1000_ICR);
2357
2358         igb_irq_enable(adapter);
2359
2360         /* notify VFs that reset has been completed */
2361         if (adapter->vfs_allocated_count) {
2362                 u32 reg_data = rd32(E1000_CTRL_EXT);
2363                 reg_data |= E1000_CTRL_EXT_PFRSTD;
2364                 wr32(E1000_CTRL_EXT, reg_data);
2365         }
2366
2367         netif_tx_start_all_queues(netdev);
2368
2369         /* start the watchdog. */
2370         hw->mac.get_link_status = 1;
2371         schedule_work(&adapter->watchdog_task);
2372
2373         return 0;
2374
2375 err_req_irq:
2376         igb_release_hw_control(adapter);
2377         igb_power_down_link(adapter);
2378         igb_free_all_rx_resources(adapter);
2379 err_setup_rx:
2380         igb_free_all_tx_resources(adapter);
2381 err_setup_tx:
2382         igb_reset(adapter);
2383
2384         return err;
2385 }
2386
2387 /**
2388  * igb_close - Disables a network interface
2389  * @netdev: network interface device structure
2390  *
2391  * Returns 0, this is not allowed to fail
2392  *
2393  * The close entry point is called when an interface is de-activated
2394  * by the OS.  The hardware is still under the driver's control, but
2395  * needs to be disabled.  A global MAC reset is issued to stop the
2396  * hardware, and all transmit and receive resources are freed.
2397  **/
2398 static int igb_close(struct net_device *netdev)
2399 {
2400         struct igb_adapter *adapter = netdev_priv(netdev);
2401
2402         WARN_ON(test_bit(__IGB_RESETTING, &adapter->state));
2403         igb_down(adapter);
2404
2405         igb_free_irq(adapter);
2406
2407         igb_free_all_tx_resources(adapter);
2408         igb_free_all_rx_resources(adapter);
2409
2410         return 0;
2411 }
2412
2413 /**
2414  * igb_setup_tx_resources - allocate Tx resources (Descriptors)
2415  * @tx_ring: tx descriptor ring (for a specific queue) to setup
2416  *
2417  * Return 0 on success, negative on failure
2418  **/
2419 int igb_setup_tx_resources(struct igb_ring *tx_ring)
2420 {
2421         struct device *dev = tx_ring->dev;
2422         int size;
2423
2424         size = sizeof(struct igb_buffer) * tx_ring->count;
2425         tx_ring->buffer_info = vmalloc(size);
2426         if (!tx_ring->buffer_info)
2427                 goto err;
2428         memset(tx_ring->buffer_info, 0, size);
2429
2430         /* round up to nearest 4K */
2431         tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc);
2432         tx_ring->size = ALIGN(tx_ring->size, 4096);
2433
2434         tx_ring->desc = dma_alloc_coherent(dev,
2435                                            tx_ring->size,
2436                                            &tx_ring->dma,
2437                                            GFP_KERNEL);
2438
2439         if (!tx_ring->desc)
2440                 goto err;
2441
2442         tx_ring->next_to_use = 0;
2443         tx_ring->next_to_clean = 0;
2444         return 0;
2445
2446 err:
2447         vfree(tx_ring->buffer_info);
2448         dev_err(dev,
2449                 "Unable to allocate memory for the transmit descriptor ring\n");
2450         return -ENOMEM;
2451 }
2452
2453 /**
2454  * igb_setup_all_tx_resources - wrapper to allocate Tx resources
2455  *                                (Descriptors) for all queues
2456  * @adapter: board private structure
2457  *
2458  * Return 0 on success, negative on failure
2459  **/
2460 static int igb_setup_all_tx_resources(struct igb_adapter *adapter)
2461 {
2462         struct pci_dev *pdev = adapter->pdev;
2463         int i, err = 0;
2464
2465         for (i = 0; i < adapter->num_tx_queues; i++) {
2466                 err = igb_setup_tx_resources(adapter->tx_ring[i]);
2467                 if (err) {
2468                         dev_err(&pdev->dev,
2469                                 "Allocation for Tx Queue %u failed\n", i);
2470                         for (i--; i >= 0; i--)
2471                                 igb_free_tx_resources(adapter->tx_ring[i]);
2472                         break;
2473                 }
2474         }
2475
2476         for (i = 0; i < IGB_ABS_MAX_TX_QUEUES; i++) {
2477                 int r_idx = i % adapter->num_tx_queues;
2478                 adapter->multi_tx_table[i] = adapter->tx_ring[r_idx];
2479         }
2480         return err;
2481 }
2482
2483 /**
2484  * igb_setup_tctl - configure the transmit control registers
2485  * @adapter: Board private structure
2486  **/
2487 void igb_setup_tctl(struct igb_adapter *adapter)
2488 {
2489         struct e1000_hw *hw = &adapter->hw;
2490         u32 tctl;
2491
2492         /* disable queue 0 which is enabled by default on 82575 and 82576 */
2493         wr32(E1000_TXDCTL(0), 0);
2494
2495         /* Program the Transmit Control Register */
2496         tctl = rd32(E1000_TCTL);
2497         tctl &= ~E1000_TCTL_CT;
2498         tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC |
2499                 (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT);
2500
2501         igb_config_collision_dist(hw);
2502
2503         /* Enable transmits */
2504         tctl |= E1000_TCTL_EN;
2505
2506         wr32(E1000_TCTL, tctl);
2507 }
2508
2509 /**
2510  * igb_configure_tx_ring - Configure transmit ring after Reset
2511  * @adapter: board private structure
2512  * @ring: tx ring to configure
2513  *
2514  * Configure a transmit ring after a reset.
2515  **/
2516 void igb_configure_tx_ring(struct igb_adapter *adapter,
2517                            struct igb_ring *ring)
2518 {
2519         struct e1000_hw *hw = &adapter->hw;
2520         u32 txdctl;
2521         u64 tdba = ring->dma;
2522         int reg_idx = ring->reg_idx;
2523
2524         /* disable the queue */
2525         txdctl = rd32(E1000_TXDCTL(reg_idx));
2526         wr32(E1000_TXDCTL(reg_idx),
2527                         txdctl & ~E1000_TXDCTL_QUEUE_ENABLE);
2528         wrfl();
2529         mdelay(10);
2530
2531         wr32(E1000_TDLEN(reg_idx),
2532                         ring->count * sizeof(union e1000_adv_tx_desc));
2533         wr32(E1000_TDBAL(reg_idx),
2534                         tdba & 0x00000000ffffffffULL);
2535         wr32(E1000_TDBAH(reg_idx), tdba >> 32);
2536
2537         ring->head = hw->hw_addr + E1000_TDH(reg_idx);
2538         ring->tail = hw->hw_addr + E1000_TDT(reg_idx);
2539         writel(0, ring->head);
2540         writel(0, ring->tail);
2541
2542         txdctl |= IGB_TX_PTHRESH;
2543         txdctl |= IGB_TX_HTHRESH << 8;
2544         txdctl |= IGB_TX_WTHRESH << 16;
2545
2546         txdctl |= E1000_TXDCTL_QUEUE_ENABLE;
2547         wr32(E1000_TXDCTL(reg_idx), txdctl);
2548 }
2549
2550 /**
2551  * igb_configure_tx - Configure transmit Unit after Reset
2552  * @adapter: board private structure
2553  *
2554  * Configure the Tx unit of the MAC after a reset.
2555  **/
2556 static void igb_configure_tx(struct igb_adapter *adapter)
2557 {
2558         int i;
2559
2560         for (i = 0; i < adapter->num_tx_queues; i++)
2561                 igb_configure_tx_ring(adapter, adapter->tx_ring[i]);
2562 }
2563
2564 /**
2565  * igb_setup_rx_resources - allocate Rx resources (Descriptors)
2566  * @rx_ring:    rx descriptor ring (for a specific queue) to setup
2567  *
2568  * Returns 0 on success, negative on failure
2569  **/
2570 int igb_setup_rx_resources(struct igb_ring *rx_ring)
2571 {
2572         struct device *dev = rx_ring->dev;
2573         int size, desc_len;
2574
2575         size = sizeof(struct igb_buffer) * rx_ring->count;
2576         rx_ring->buffer_info = vmalloc(size);
2577         if (!rx_ring->buffer_info)
2578                 goto err;
2579         memset(rx_ring->buffer_info, 0, size);
2580
2581         desc_len = sizeof(union e1000_adv_rx_desc);
2582
2583         /* Round up to nearest 4K */
2584         rx_ring->size = rx_ring->count * desc_len;
2585         rx_ring->size = ALIGN(rx_ring->size, 4096);
2586
2587         rx_ring->desc = dma_alloc_coherent(dev,
2588                                            rx_ring->size,
2589                                            &rx_ring->dma,
2590                                            GFP_KERNEL);
2591
2592         if (!rx_ring->desc)
2593                 goto err;
2594
2595         rx_ring->next_to_clean = 0;
2596         rx_ring->next_to_use = 0;
2597
2598         return 0;
2599
2600 err:
2601         vfree(rx_ring->buffer_info);
2602         rx_ring->buffer_info = NULL;
2603         dev_err(dev, "Unable to allocate memory for the receive descriptor"
2604                 " ring\n");
2605         return -ENOMEM;
2606 }
2607
2608 /**
2609  * igb_setup_all_rx_resources - wrapper to allocate Rx resources
2610  *                                (Descriptors) for all queues
2611  * @adapter: board private structure
2612  *
2613  * Return 0 on success, negative on failure
2614  **/
2615 static int igb_setup_all_rx_resources(struct igb_adapter *adapter)
2616 {
2617         struct pci_dev *pdev = adapter->pdev;
2618         int i, err = 0;
2619
2620         for (i = 0; i < adapter->num_rx_queues; i++) {
2621                 err = igb_setup_rx_resources(adapter->rx_ring[i]);
2622                 if (err) {
2623                         dev_err(&pdev->dev,
2624                                 "Allocation for Rx Queue %u failed\n", i);
2625                         for (i--; i >= 0; i--)
2626                                 igb_free_rx_resources(adapter->rx_ring[i]);
2627                         break;
2628                 }
2629         }
2630
2631         return err;
2632 }
2633
2634 /**
2635  * igb_setup_mrqc - configure the multiple receive queue control registers
2636  * @adapter: Board private structure
2637  **/
2638 static void igb_setup_mrqc(struct igb_adapter *adapter)
2639 {
2640         struct e1000_hw *hw = &adapter->hw;
2641         u32 mrqc, rxcsum;
2642         u32 j, num_rx_queues, shift = 0, shift2 = 0;
2643         union e1000_reta {
2644                 u32 dword;
2645                 u8  bytes[4];
2646         } reta;
2647         static const u8 rsshash[40] = {
2648                 0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2, 0x41, 0x67,
2649                 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0, 0xd0, 0xca, 0x2b, 0xcb,
2650                 0xae, 0x7b, 0x30, 0xb4, 0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30,
2651                 0xf2, 0x0c, 0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa };
2652
2653         /* Fill out hash function seeds */
2654         for (j = 0; j < 10; j++) {
2655                 u32 rsskey = rsshash[(j * 4)];
2656                 rsskey |= rsshash[(j * 4) + 1] << 8;
2657                 rsskey |= rsshash[(j * 4) + 2] << 16;
2658                 rsskey |= rsshash[(j * 4) + 3] << 24;
2659                 array_wr32(E1000_RSSRK(0), j, rsskey);
2660         }
2661
2662         num_rx_queues = adapter->rss_queues;
2663
2664         if (adapter->vfs_allocated_count) {
2665                 /* 82575 and 82576 supports 2 RSS queues for VMDq */
2666                 switch (hw->mac.type) {
2667                 case e1000_i350:
2668                 case e1000_82580:
2669                         num_rx_queues = 1;
2670                         shift = 0;
2671                         break;
2672                 case e1000_82576:
2673                         shift = 3;
2674                         num_rx_queues = 2;
2675                         break;
2676                 case e1000_82575:
2677                         shift = 2;
2678                         shift2 = 6;
2679                 default:
2680                         break;
2681                 }
2682         } else {
2683                 if (hw->mac.type == e1000_82575)
2684                         shift = 6;
2685         }
2686
2687         for (j = 0; j < (32 * 4); j++) {
2688                 reta.bytes[j & 3] = (j % num_rx_queues) << shift;
2689                 if (shift2)
2690                         reta.bytes[j & 3] |= num_rx_queues << shift2;
2691                 if ((j & 3) == 3)
2692                         wr32(E1000_RETA(j >> 2), reta.dword);
2693         }
2694
2695         /*
2696          * Disable raw packet checksumming so that RSS hash is placed in
2697          * descriptor on writeback.  No need to enable TCP/UDP/IP checksum
2698          * offloads as they are enabled by default
2699          */
2700         rxcsum = rd32(E1000_RXCSUM);
2701         rxcsum |= E1000_RXCSUM_PCSD;
2702
2703         if (adapter->hw.mac.type >= e1000_82576)
2704                 /* Enable Receive Checksum Offload for SCTP */
2705                 rxcsum |= E1000_RXCSUM_CRCOFL;
2706
2707         /* Don't need to set TUOFL or IPOFL, they default to 1 */
2708         wr32(E1000_RXCSUM, rxcsum);
2709
2710         /* If VMDq is enabled then we set the appropriate mode for that, else
2711          * we default to RSS so that an RSS hash is calculated per packet even
2712          * if we are only using one queue */
2713         if (adapter->vfs_allocated_count) {
2714                 if (hw->mac.type > e1000_82575) {
2715                         /* Set the default pool for the PF's first queue */
2716                         u32 vtctl = rd32(E1000_VT_CTL);
2717                         vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK |
2718                                    E1000_VT_CTL_DISABLE_DEF_POOL);
2719                         vtctl |= adapter->vfs_allocated_count <<
2720                                 E1000_VT_CTL_DEFAULT_POOL_SHIFT;
2721                         wr32(E1000_VT_CTL, vtctl);
2722                 }
2723                 if (adapter->rss_queues > 1)
2724                         mrqc = E1000_MRQC_ENABLE_VMDQ_RSS_2Q;
2725                 else
2726                         mrqc = E1000_MRQC_ENABLE_VMDQ;
2727         } else {
2728                 mrqc = E1000_MRQC_ENABLE_RSS_4Q;
2729         }
2730         igb_vmm_control(adapter);
2731
2732         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4 |
2733                  E1000_MRQC_RSS_FIELD_IPV4_TCP);
2734         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6 |
2735                  E1000_MRQC_RSS_FIELD_IPV6_TCP);
2736         mrqc |= (E1000_MRQC_RSS_FIELD_IPV4_UDP |
2737                  E1000_MRQC_RSS_FIELD_IPV6_UDP);
2738         mrqc |= (E1000_MRQC_RSS_FIELD_IPV6_UDP_EX |
2739                  E1000_MRQC_RSS_FIELD_IPV6_TCP_EX);
2740
2741         wr32(E1000_MRQC, mrqc);
2742 }
2743
2744 /**
2745  * igb_setup_rctl - configure the receive control registers
2746  * @adapter: Board private structure
2747  **/
2748 void igb_setup_rctl(struct igb_adapter *adapter)
2749 {
2750         struct e1000_hw *hw = &adapter->hw;
2751         u32 rctl;
2752
2753         rctl = rd32(E1000_RCTL);
2754
2755         rctl &= ~(3 << E1000_RCTL_MO_SHIFT);
2756         rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC);
2757
2758         rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF |
2759                 (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT);
2760
2761         /*
2762          * enable stripping of CRC. It's unlikely this will break BMC
2763          * redirection as it did with e1000. Newer features require
2764          * that the HW strips the CRC.
2765          */
2766         rctl |= E1000_RCTL_SECRC;
2767
2768         /* disable store bad packets and clear size bits. */
2769         rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256);
2770
2771         /* enable LPE to prevent packets larger than max_frame_size */
2772         rctl |= E1000_RCTL_LPE;
2773
2774         /* disable queue 0 to prevent tail write w/o re-config */
2775         wr32(E1000_RXDCTL(0), 0);
2776
2777         /* Attention!!!  For SR-IOV PF driver operations you must enable
2778          * queue drop for all VF and PF queues to prevent head of line blocking
2779          * if an un-trusted VF does not provide descriptors to hardware.
2780          */
2781         if (adapter->vfs_allocated_count) {
2782                 /* set all queue drop enable bits */
2783                 wr32(E1000_QDE, ALL_QUEUES);
2784         }
2785
2786         wr32(E1000_RCTL, rctl);
2787 }
2788
2789 static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size,
2790                                    int vfn)
2791 {
2792         struct e1000_hw *hw = &adapter->hw;
2793         u32 vmolr;
2794
2795         /* if it isn't the PF check to see if VFs are enabled and
2796          * increase the size to support vlan tags */
2797         if (vfn < adapter->vfs_allocated_count &&
2798             adapter->vf_data[vfn].vlans_enabled)
2799                 size += VLAN_TAG_SIZE;
2800
2801         vmolr = rd32(E1000_VMOLR(vfn));
2802         vmolr &= ~E1000_VMOLR_RLPML_MASK;
2803         vmolr |= size | E1000_VMOLR_LPE;
2804         wr32(E1000_VMOLR(vfn), vmolr);
2805
2806         return 0;
2807 }
2808
2809 /**
2810  * igb_rlpml_set - set maximum receive packet size
2811  * @adapter: board private structure
2812  *
2813  * Configure maximum receivable packet size.
2814  **/
2815 static void igb_rlpml_set(struct igb_adapter *adapter)
2816 {
2817         u32 max_frame_size = adapter->max_frame_size;
2818         struct e1000_hw *hw = &adapter->hw;
2819         u16 pf_id = adapter->vfs_allocated_count;
2820
2821         if (adapter->vlgrp)
2822                 max_frame_size += VLAN_TAG_SIZE;
2823
2824         /* if vfs are enabled we set RLPML to the largest possible request
2825          * size and set the VMOLR RLPML to the size we need */
2826         if (pf_id) {
2827                 igb_set_vf_rlpml(adapter, max_frame_size, pf_id);
2828                 max_frame_size = MAX_JUMBO_FRAME_SIZE;
2829         }
2830
2831         wr32(E1000_RLPML, max_frame_size);
2832 }
2833
2834 static inline void igb_set_vmolr(struct igb_adapter *adapter,
2835                                  int vfn, bool aupe)
2836 {
2837         struct e1000_hw *hw = &adapter->hw;
2838         u32 vmolr;
2839
2840         /*
2841          * This register exists only on 82576 and newer so if we are older then
2842          * we should exit and do nothing
2843          */
2844         if (hw->mac.type < e1000_82576)
2845                 return;
2846
2847         vmolr = rd32(E1000_VMOLR(vfn));
2848         vmolr |= E1000_VMOLR_STRVLAN;      /* Strip vlan tags */
2849         if (aupe)
2850                 vmolr |= E1000_VMOLR_AUPE;        /* Accept untagged packets */
2851         else
2852                 vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */
2853
2854         /* clear all bits that might not be set */
2855         vmolr &= ~(E1000_VMOLR_BAM | E1000_VMOLR_RSSE);
2856
2857         if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count)
2858                 vmolr |= E1000_VMOLR_RSSE; /* enable RSS */
2859         /*
2860          * for VMDq only allow the VFs and pool 0 to accept broadcast and
2861          * multicast packets
2862          */
2863         if (vfn <= adapter->vfs_allocated_count)
2864                 vmolr |= E1000_VMOLR_BAM;          /* Accept broadcast */
2865
2866         wr32(E1000_VMOLR(vfn), vmolr);
2867 }
2868
2869 /**
2870  * igb_configure_rx_ring - Configure a receive ring after Reset
2871  * @adapter: board private structure
2872  * @ring: receive ring to be configured
2873  *
2874  * Configure the Rx unit of the MAC after a reset.
2875  **/
2876 void igb_configure_rx_ring(struct igb_adapter *adapter,
2877                            struct igb_ring *ring)
2878 {
2879         struct e1000_hw *hw = &adapter->hw;
2880         u64 rdba = ring->dma;
2881         int reg_idx = ring->reg_idx;
2882         u32 srrctl, rxdctl;
2883
2884         /* disable the queue */
2885         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2886         wr32(E1000_RXDCTL(reg_idx),
2887                         rxdctl & ~E1000_RXDCTL_QUEUE_ENABLE);
2888
2889         /* Set DMA base address registers */
2890         wr32(E1000_RDBAL(reg_idx),
2891              rdba & 0x00000000ffffffffULL);
2892         wr32(E1000_RDBAH(reg_idx), rdba >> 32);
2893         wr32(E1000_RDLEN(reg_idx),
2894                        ring->count * sizeof(union e1000_adv_rx_desc));
2895
2896         /* initialize head and tail */
2897         ring->head = hw->hw_addr + E1000_RDH(reg_idx);
2898         ring->tail = hw->hw_addr + E1000_RDT(reg_idx);
2899         writel(0, ring->head);
2900         writel(0, ring->tail);
2901
2902         /* set descriptor configuration */
2903         if (ring->rx_buffer_len < IGB_RXBUFFER_1024) {
2904                 srrctl = ALIGN(ring->rx_buffer_len, 64) <<
2905                          E1000_SRRCTL_BSIZEHDRSIZE_SHIFT;
2906 #if (PAGE_SIZE / 2) > IGB_RXBUFFER_16384
2907                 srrctl |= IGB_RXBUFFER_16384 >>
2908                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2909 #else
2910                 srrctl |= (PAGE_SIZE / 2) >>
2911                           E1000_SRRCTL_BSIZEPKT_SHIFT;
2912 #endif
2913                 srrctl |= E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS;
2914         } else {
2915                 srrctl = ALIGN(ring->rx_buffer_len, 1024) >>
2916                          E1000_SRRCTL_BSIZEPKT_SHIFT;
2917                 srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF;
2918         }
2919         if (hw->mac.type == e1000_82580)
2920                 srrctl |= E1000_SRRCTL_TIMESTAMP;
2921         /* Only set Drop Enable if we are supporting multiple queues */
2922         if (adapter->vfs_allocated_count || adapter->num_rx_queues > 1)
2923                 srrctl |= E1000_SRRCTL_DROP_EN;
2924
2925         wr32(E1000_SRRCTL(reg_idx), srrctl);
2926
2927         /* set filtering for VMDQ pools */
2928         igb_set_vmolr(adapter, reg_idx & 0x7, true);
2929
2930         /* enable receive descriptor fetching */
2931         rxdctl = rd32(E1000_RXDCTL(reg_idx));
2932         rxdctl |= E1000_RXDCTL_QUEUE_ENABLE;
2933         rxdctl &= 0xFFF00000;
2934         rxdctl |= IGB_RX_PTHRESH;
2935         rxdctl |= IGB_RX_HTHRESH << 8;
2936         rxdctl |= IGB_RX_WTHRESH << 16;
2937         wr32(E1000_RXDCTL(reg_idx), rxdctl);
2938 }
2939
2940 /**
2941  * igb_configure_rx - Configure receive Unit after Reset
2942  * @adapter: board private structure
2943  *
2944  * Configure the Rx unit of the MAC after a reset.
2945  **/
2946 static void igb_configure_rx(struct igb_adapter *adapter)
2947 {
2948         int i;
2949
2950         /* set UTA to appropriate mode */
2951         igb_set_uta(adapter);
2952
2953         /* set the correct pool for the PF default MAC address in entry 0 */
2954         igb_rar_set_qsel(adapter, adapter->hw.mac.addr, 0,
2955                          adapter->vfs_allocated_count);
2956
2957         /* Setup the HW Rx Head and Tail Descriptor Pointers and
2958          * the Base and Length of the Rx Descriptor Ring */
2959         for (i = 0; i < adapter->num_rx_queues; i++)
2960                 igb_configure_rx_ring(adapter, adapter->rx_ring[i]);
2961 }
2962
2963 /**
2964  * igb_free_tx_resources - Free Tx Resources per Queue
2965  * @tx_ring: Tx descriptor ring for a specific queue
2966  *
2967  * Free all transmit software resources
2968  **/
2969 void igb_free_tx_resources(struct igb_ring *tx_ring)
2970 {
2971         igb_clean_tx_ring(tx_ring);
2972
2973         vfree(tx_ring->buffer_info);
2974         tx_ring->buffer_info = NULL;
2975
2976         /* if not set, then don't free */
2977         if (!tx_ring->desc)
2978                 return;
2979
2980         dma_free_coherent(tx_ring->dev, tx_ring->size,
2981                           tx_ring->desc, tx_ring->dma);
2982
2983         tx_ring->desc = NULL;
2984 }
2985
2986 /**
2987  * igb_free_all_tx_resources - Free Tx Resources for All Queues
2988  * @adapter: board private structure
2989  *
2990  * Free all transmit software resources
2991  **/
2992 static void igb_free_all_tx_resources(struct igb_adapter *adapter)
2993 {
2994         int i;
2995
2996         for (i = 0; i < adapter->num_tx_queues; i++)
2997                 igb_free_tx_resources(adapter->tx_ring[i]);
2998 }
2999
3000 void igb_unmap_and_free_tx_resource(struct igb_ring *tx_ring,
3001                                     struct igb_buffer *buffer_info)
3002 {
3003         if (buffer_info->dma) {
3004                 if (buffer_info->mapped_as_page)
3005                         dma_unmap_page(tx_ring->dev,
3006                                         buffer_info->dma,
3007                                         buffer_info->length,
3008                                         DMA_TO_DEVICE);
3009                 else
3010                         dma_unmap_single(tx_ring->dev,
3011                                         buffer_info->dma,
3012                                         buffer_info->length,
3013                                         DMA_TO_DEVICE);
3014                 buffer_info->dma = 0;
3015         }
3016         if (buffer_info->skb) {
3017                 dev_kfree_skb_any(buffer_info->skb);
3018                 buffer_info->skb = NULL;
3019         }
3020         buffer_info->time_stamp = 0;
3021         buffer_info->length = 0;
3022         buffer_info->next_to_watch = 0;
3023         buffer_info->mapped_as_page = false;
3024 }
3025
3026 /**
3027  * igb_clean_tx_ring - Free Tx Buffers
3028  * @tx_ring: ring to be cleaned
3029  **/
3030 static void igb_clean_tx_ring(struct igb_ring *tx_ring)
3031 {
3032         struct igb_buffer *buffer_info;
3033         unsigned long size;
3034         unsigned int i;
3035
3036         if (!tx_ring->buffer_info)
3037                 return;
3038         /* Free all the Tx ring sk_buffs */
3039
3040         for (i = 0; i < tx_ring->count; i++) {
3041                 buffer_info = &tx_ring->buffer_info[i];
3042                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3043         }
3044
3045         size = sizeof(struct igb_buffer) * tx_ring->count;
3046         memset(tx_ring->buffer_info, 0, size);
3047
3048         /* Zero out the descriptor ring */
3049         memset(tx_ring->desc, 0, tx_ring->size);
3050
3051         tx_ring->next_to_use = 0;
3052         tx_ring->next_to_clean = 0;
3053 }
3054
3055 /**
3056  * igb_clean_all_tx_rings - Free Tx Buffers for all queues
3057  * @adapter: board private structure
3058  **/
3059 static void igb_clean_all_tx_rings(struct igb_adapter *adapter)
3060 {
3061         int i;
3062
3063         for (i = 0; i < adapter->num_tx_queues; i++)
3064                 igb_clean_tx_ring(adapter->tx_ring[i]);
3065 }
3066
3067 /**
3068  * igb_free_rx_resources - Free Rx Resources
3069  * @rx_ring: ring to clean the resources from
3070  *
3071  * Free all receive software resources
3072  **/
3073 void igb_free_rx_resources(struct igb_ring *rx_ring)
3074 {
3075         igb_clean_rx_ring(rx_ring);
3076
3077         vfree(rx_ring->buffer_info);
3078         rx_ring->buffer_info = NULL;
3079
3080         /* if not set, then don't free */
3081         if (!rx_ring->desc)
3082                 return;
3083
3084         dma_free_coherent(rx_ring->dev, rx_ring->size,
3085                           rx_ring->desc, rx_ring->dma);
3086
3087         rx_ring->desc = NULL;
3088 }
3089
3090 /**
3091  * igb_free_all_rx_resources - Free Rx Resources for All Queues
3092  * @adapter: board private structure
3093  *
3094  * Free all receive software resources
3095  **/
3096 static void igb_free_all_rx_resources(struct igb_adapter *adapter)
3097 {
3098         int i;
3099
3100         for (i = 0; i < adapter->num_rx_queues; i++)
3101                 igb_free_rx_resources(adapter->rx_ring[i]);
3102 }
3103
3104 /**
3105  * igb_clean_rx_ring - Free Rx Buffers per Queue
3106  * @rx_ring: ring to free buffers from
3107  **/
3108 static void igb_clean_rx_ring(struct igb_ring *rx_ring)
3109 {
3110         struct igb_buffer *buffer_info;
3111         unsigned long size;
3112         unsigned int i;
3113
3114         if (!rx_ring->buffer_info)
3115                 return;
3116
3117         /* Free all the Rx ring sk_buffs */
3118         for (i = 0; i < rx_ring->count; i++) {
3119                 buffer_info = &rx_ring->buffer_info[i];
3120                 if (buffer_info->dma) {
3121                         dma_unmap_single(rx_ring->dev,
3122                                          buffer_info->dma,
3123                                          rx_ring->rx_buffer_len,
3124                                          DMA_FROM_DEVICE);
3125                         buffer_info->dma = 0;
3126                 }
3127
3128                 if (buffer_info->skb) {
3129                         dev_kfree_skb(buffer_info->skb);
3130                         buffer_info->skb = NULL;
3131                 }
3132                 if (buffer_info->page_dma) {
3133                         dma_unmap_page(rx_ring->dev,
3134                                        buffer_info->page_dma,
3135                                        PAGE_SIZE / 2,
3136                                        DMA_FROM_DEVICE);
3137                         buffer_info->page_dma = 0;
3138                 }
3139                 if (buffer_info->page) {
3140                         put_page(buffer_info->page);
3141                         buffer_info->page = NULL;
3142                         buffer_info->page_offset = 0;
3143                 }
3144         }
3145
3146         size = sizeof(struct igb_buffer) * rx_ring->count;
3147         memset(rx_ring->buffer_info, 0, size);
3148
3149         /* Zero out the descriptor ring */
3150         memset(rx_ring->desc, 0, rx_ring->size);
3151
3152         rx_ring->next_to_clean = 0;
3153         rx_ring->next_to_use = 0;
3154 }
3155
3156 /**
3157  * igb_clean_all_rx_rings - Free Rx Buffers for all queues
3158  * @adapter: board private structure
3159  **/
3160 static void igb_clean_all_rx_rings(struct igb_adapter *adapter)
3161 {
3162         int i;
3163
3164         for (i = 0; i < adapter->num_rx_queues; i++)
3165                 igb_clean_rx_ring(adapter->rx_ring[i]);
3166 }
3167
3168 /**
3169  * igb_set_mac - Change the Ethernet Address of the NIC
3170  * @netdev: network interface device structure
3171  * @p: pointer to an address structure
3172  *
3173  * Returns 0 on success, negative on failure
3174  **/
3175 static int igb_set_mac(struct net_device *netdev, void *p)
3176 {
3177         struct igb_adapter *adapter = netdev_priv(netdev);
3178         struct e1000_hw *hw = &adapter->hw;
3179         struct sockaddr *addr = p;
3180
3181         if (!is_valid_ether_addr(addr->sa_data))
3182                 return -EADDRNOTAVAIL;
3183
3184         memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
3185         memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len);
3186
3187         /* set the correct pool for the new PF MAC address in entry 0 */
3188         igb_rar_set_qsel(adapter, hw->mac.addr, 0,
3189                          adapter->vfs_allocated_count);
3190
3191         return 0;
3192 }
3193
3194 /**
3195  * igb_write_mc_addr_list - write multicast addresses to MTA
3196  * @netdev: network interface device structure
3197  *
3198  * Writes multicast address list to the MTA hash table.
3199  * Returns: -ENOMEM on failure
3200  *                0 on no addresses written
3201  *                X on writing X addresses to MTA
3202  **/
3203 static int igb_write_mc_addr_list(struct net_device *netdev)
3204 {
3205         struct igb_adapter *adapter = netdev_priv(netdev);
3206         struct e1000_hw *hw = &adapter->hw;
3207         struct netdev_hw_addr *ha;
3208         u8  *mta_list;
3209         int i;
3210
3211         if (netdev_mc_empty(netdev)) {
3212                 /* nothing to program, so clear mc list */
3213                 igb_update_mc_addr_list(hw, NULL, 0);
3214                 igb_restore_vf_multicasts(adapter);
3215                 return 0;
3216         }
3217
3218         mta_list = kzalloc(netdev_mc_count(netdev) * 6, GFP_ATOMIC);
3219         if (!mta_list)
3220                 return -ENOMEM;
3221
3222         /* The shared function expects a packed array of only addresses. */
3223         i = 0;
3224         netdev_for_each_mc_addr(ha, netdev)
3225                 memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN);
3226
3227         igb_update_mc_addr_list(hw, mta_list, i);
3228         kfree(mta_list);
3229
3230         return netdev_mc_count(netdev);
3231 }
3232
3233 /**
3234  * igb_write_uc_addr_list - write unicast addresses to RAR table
3235  * @netdev: network interface device structure
3236  *
3237  * Writes unicast address list to the RAR table.
3238  * Returns: -ENOMEM on failure/insufficient address space
3239  *                0 on no addresses written
3240  *                X on writing X addresses to the RAR table
3241  **/
3242 static int igb_write_uc_addr_list(struct net_device *netdev)
3243 {
3244         struct igb_adapter *adapter = netdev_priv(netdev);
3245         struct e1000_hw *hw = &adapter->hw;
3246         unsigned int vfn = adapter->vfs_allocated_count;
3247         unsigned int rar_entries = hw->mac.rar_entry_count - (vfn + 1);
3248         int count = 0;
3249
3250         /* return ENOMEM indicating insufficient memory for addresses */
3251         if (netdev_uc_count(netdev) > rar_entries)
3252                 return -ENOMEM;
3253
3254         if (!netdev_uc_empty(netdev) && rar_entries) {
3255                 struct netdev_hw_addr *ha;
3256
3257                 netdev_for_each_uc_addr(ha, netdev) {
3258                         if (!rar_entries)
3259                                 break;
3260                         igb_rar_set_qsel(adapter, ha->addr,
3261                                          rar_entries--,
3262                                          vfn);
3263                         count++;
3264                 }
3265         }
3266         /* write the addresses in reverse order to avoid write combining */
3267         for (; rar_entries > 0 ; rar_entries--) {
3268                 wr32(E1000_RAH(rar_entries), 0);
3269                 wr32(E1000_RAL(rar_entries), 0);
3270         }
3271         wrfl();
3272
3273         return count;
3274 }
3275
3276 /**
3277  * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set
3278  * @netdev: network interface device structure
3279  *
3280  * The set_rx_mode entry point is called whenever the unicast or multicast
3281  * address lists or the network interface flags are updated.  This routine is
3282  * responsible for configuring the hardware for proper unicast, multicast,
3283  * promiscuous mode, and all-multi behavior.
3284  **/
3285 static void igb_set_rx_mode(struct net_device *netdev)
3286 {
3287         struct igb_adapter *adapter = netdev_priv(netdev);
3288         struct e1000_hw *hw = &adapter->hw;
3289         unsigned int vfn = adapter->vfs_allocated_count;
3290         u32 rctl, vmolr = 0;
3291         int count;
3292
3293         /* Check for Promiscuous and All Multicast modes */
3294         rctl = rd32(E1000_RCTL);
3295
3296         /* clear the effected bits */
3297         rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE);
3298
3299         if (netdev->flags & IFF_PROMISC) {
3300                 rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE);
3301                 vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME);
3302         } else {
3303                 if (netdev->flags & IFF_ALLMULTI) {
3304                         rctl |= E1000_RCTL_MPE;
3305                         vmolr |= E1000_VMOLR_MPME;
3306                 } else {
3307                         /*
3308                          * Write addresses to the MTA, if the attempt fails
3309                          * then we should just turn on promiscous mode so
3310                          * that we can at least receive multicast traffic
3311                          */
3312                         count = igb_write_mc_addr_list(netdev);
3313                         if (count < 0) {
3314                                 rctl |= E1000_RCTL_MPE;
3315                                 vmolr |= E1000_VMOLR_MPME;
3316                         } else if (count) {
3317                                 vmolr |= E1000_VMOLR_ROMPE;
3318                         }
3319                 }
3320                 /*
3321                  * Write addresses to available RAR registers, if there is not
3322                  * sufficient space to store all the addresses then enable
3323                  * unicast promiscous mode
3324                  */
3325                 count = igb_write_uc_addr_list(netdev);
3326                 if (count < 0) {
3327                         rctl |= E1000_RCTL_UPE;
3328                         vmolr |= E1000_VMOLR_ROPE;
3329                 }
3330                 rctl |= E1000_RCTL_VFE;
3331         }
3332         wr32(E1000_RCTL, rctl);
3333
3334         /*
3335          * In order to support SR-IOV and eventually VMDq it is necessary to set
3336          * the VMOLR to enable the appropriate modes.  Without this workaround
3337          * we will have issues with VLAN tag stripping not being done for frames
3338          * that are only arriving because we are the default pool
3339          */
3340         if (hw->mac.type < e1000_82576)
3341                 return;
3342
3343         vmolr |= rd32(E1000_VMOLR(vfn)) &
3344                  ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE);
3345         wr32(E1000_VMOLR(vfn), vmolr);
3346         igb_restore_vf_multicasts(adapter);
3347 }
3348
3349 /* Need to wait a few seconds after link up to get diagnostic information from
3350  * the phy */
3351 static void igb_update_phy_info(unsigned long data)
3352 {
3353         struct igb_adapter *adapter = (struct igb_adapter *) data;
3354         igb_get_phy_info(&adapter->hw);
3355 }
3356
3357 /**
3358  * igb_has_link - check shared code for link and determine up/down
3359  * @adapter: pointer to driver private info
3360  **/
3361 bool igb_has_link(struct igb_adapter *adapter)
3362 {
3363         struct e1000_hw *hw = &adapter->hw;
3364         bool link_active = false;
3365         s32 ret_val = 0;
3366
3367         /* get_link_status is set on LSC (link status) interrupt or
3368          * rx sequence error interrupt.  get_link_status will stay
3369          * false until the e1000_check_for_link establishes link
3370          * for copper adapters ONLY
3371          */
3372         switch (hw->phy.media_type) {
3373         case e1000_media_type_copper:
3374                 if (hw->mac.get_link_status) {
3375                         ret_val = hw->mac.ops.check_for_link(hw);
3376                         link_active = !hw->mac.get_link_status;
3377                 } else {
3378                         link_active = true;
3379                 }
3380                 break;
3381         case e1000_media_type_internal_serdes:
3382                 ret_val = hw->mac.ops.check_for_link(hw);
3383                 link_active = hw->mac.serdes_has_link;
3384                 break;
3385         default:
3386         case e1000_media_type_unknown:
3387                 break;
3388         }
3389
3390         return link_active;
3391 }
3392
3393 /**
3394  * igb_watchdog - Timer Call-back
3395  * @data: pointer to adapter cast into an unsigned long
3396  **/
3397 static void igb_watchdog(unsigned long data)
3398 {
3399         struct igb_adapter *adapter = (struct igb_adapter *)data;
3400         /* Do the rest outside of interrupt context */
3401         schedule_work(&adapter->watchdog_task);
3402 }
3403
3404 static void igb_watchdog_task(struct work_struct *work)
3405 {
3406         struct igb_adapter *adapter = container_of(work,
3407                                                    struct igb_adapter,
3408                                                    watchdog_task);
3409         struct e1000_hw *hw = &adapter->hw;
3410         struct net_device *netdev = adapter->netdev;
3411         u32 link;
3412         int i;
3413
3414         link = igb_has_link(adapter);
3415         if (link) {
3416                 if (!netif_carrier_ok(netdev)) {
3417                         u32 ctrl;
3418                         hw->mac.ops.get_speed_and_duplex(hw,
3419                                                          &adapter->link_speed,
3420                                                          &adapter->link_duplex);
3421
3422                         ctrl = rd32(E1000_CTRL);
3423                         /* Links status message must follow this format */
3424                         printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, "
3425                                  "Flow Control: %s\n",
3426                                netdev->name,
3427                                adapter->link_speed,
3428                                adapter->link_duplex == FULL_DUPLEX ?
3429                                  "Full Duplex" : "Half Duplex",
3430                                ((ctrl & E1000_CTRL_TFCE) &&
3431                                 (ctrl & E1000_CTRL_RFCE)) ? "RX/TX" :
3432                                ((ctrl & E1000_CTRL_RFCE) ?  "RX" :
3433                                ((ctrl & E1000_CTRL_TFCE) ?  "TX" : "None")));
3434
3435                         /* adjust timeout factor according to speed/duplex */
3436                         adapter->tx_timeout_factor = 1;
3437                         switch (adapter->link_speed) {
3438                         case SPEED_10:
3439                                 adapter->tx_timeout_factor = 14;
3440                                 break;
3441                         case SPEED_100:
3442                                 /* maybe add some timeout factor ? */
3443                                 break;
3444                         }
3445
3446                         netif_carrier_on(netdev);
3447
3448                         igb_ping_all_vfs(adapter);
3449
3450                         /* link state has changed, schedule phy info update */
3451                         if (!test_bit(__IGB_DOWN, &adapter->state))
3452                                 mod_timer(&adapter->phy_info_timer,
3453                                           round_jiffies(jiffies + 2 * HZ));
3454                 }
3455         } else {
3456                 if (netif_carrier_ok(netdev)) {
3457                         adapter->link_speed = 0;
3458                         adapter->link_duplex = 0;
3459                         /* Links status message must follow this format */
3460                         printk(KERN_INFO "igb: %s NIC Link is Down\n",
3461                                netdev->name);
3462                         netif_carrier_off(netdev);
3463
3464                         igb_ping_all_vfs(adapter);
3465
3466                         /* link state has changed, schedule phy info update */
3467                         if (!test_bit(__IGB_DOWN, &adapter->state))
3468                                 mod_timer(&adapter->phy_info_timer,
3469                                           round_jiffies(jiffies + 2 * HZ));
3470                 }
3471         }
3472
3473         igb_update_stats(adapter);
3474
3475         for (i = 0; i < adapter->num_tx_queues; i++) {
3476                 struct igb_ring *tx_ring = adapter->tx_ring[i];
3477                 if (!netif_carrier_ok(netdev)) {
3478                         /* We've lost link, so the controller stops DMA,
3479                          * but we've got queued Tx work that's never going
3480                          * to get done, so reset controller to flush Tx.
3481                          * (Do the reset outside of interrupt context). */
3482                         if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) {
3483                                 adapter->tx_timeout_count++;
3484                                 schedule_work(&adapter->reset_task);
3485                                 /* return immediately since reset is imminent */
3486                                 return;
3487                         }
3488                 }
3489
3490                 /* Force detection of hung controller every watchdog period */
3491                 tx_ring->detect_tx_hung = true;
3492         }
3493
3494         /* Cause software interrupt to ensure rx ring is cleaned */
3495         if (adapter->msix_entries) {
3496                 u32 eics = 0;
3497                 for (i = 0; i < adapter->num_q_vectors; i++) {
3498                         struct igb_q_vector *q_vector = adapter->q_vector[i];
3499                         eics |= q_vector->eims_value;
3500                 }
3501                 wr32(E1000_EICS, eics);
3502         } else {
3503                 wr32(E1000_ICS, E1000_ICS_RXDMT0);
3504         }
3505
3506         /* Reset the timer */
3507         if (!test_bit(__IGB_DOWN, &adapter->state))
3508                 mod_timer(&adapter->watchdog_timer,
3509                           round_jiffies(jiffies + 2 * HZ));
3510 }
3511
3512 enum latency_range {
3513         lowest_latency = 0,
3514         low_latency = 1,
3515         bulk_latency = 2,
3516         latency_invalid = 255
3517 };
3518
3519 /**
3520  * igb_update_ring_itr - update the dynamic ITR value based on packet size
3521  *
3522  *      Stores a new ITR value based on strictly on packet size.  This
3523  *      algorithm is less sophisticated than that used in igb_update_itr,
3524  *      due to the difficulty of synchronizing statistics across multiple
3525  *      receive rings.  The divisors and thresholds used by this fuction
3526  *      were determined based on theoretical maximum wire speed and testing
3527  *      data, in order to minimize response time while increasing bulk
3528  *      throughput.
3529  *      This functionality is controlled by the InterruptThrottleRate module
3530  *      parameter (see igb_param.c)
3531  *      NOTE:  This function is called only when operating in a multiqueue
3532  *             receive environment.
3533  * @q_vector: pointer to q_vector
3534  **/
3535 static void igb_update_ring_itr(struct igb_q_vector *q_vector)
3536 {
3537         int new_val = q_vector->itr_val;
3538         int avg_wire_size = 0;
3539         struct igb_adapter *adapter = q_vector->adapter;
3540
3541         /* For non-gigabit speeds, just fix the interrupt rate at 4000
3542          * ints/sec - ITR timer value of 120 ticks.
3543          */
3544         if (adapter->link_speed != SPEED_1000) {
3545                 new_val = 976;
3546                 goto set_itr_val;
3547         }
3548
3549         if (q_vector->rx_ring && q_vector->rx_ring->total_packets) {
3550                 struct igb_ring *ring = q_vector->rx_ring;
3551                 avg_wire_size = ring->total_bytes / ring->total_packets;
3552         }
3553
3554         if (q_vector->tx_ring && q_vector->tx_ring->total_packets) {
3555                 struct igb_ring *ring = q_vector->tx_ring;
3556                 avg_wire_size = max_t(u32, avg_wire_size,
3557                                       (ring->total_bytes /
3558                                        ring->total_packets));
3559         }
3560
3561         /* if avg_wire_size isn't set no work was done */
3562         if (!avg_wire_size)
3563                 goto clear_counts;
3564
3565         /* Add 24 bytes to size to account for CRC, preamble, and gap */
3566         avg_wire_size += 24;
3567
3568         /* Don't starve jumbo frames */
3569         avg_wire_size = min(avg_wire_size, 3000);
3570
3571         /* Give a little boost to mid-size frames */
3572         if ((avg_wire_size > 300) && (avg_wire_size < 1200))
3573                 new_val = avg_wire_size / 3;
3574         else
3575                 new_val = avg_wire_size / 2;
3576
3577         /* when in itr mode 3 do not exceed 20K ints/sec */
3578         if (adapter->rx_itr_setting == 3 && new_val < 196)
3579                 new_val = 196;
3580
3581 set_itr_val:
3582         if (new_val != q_vector->itr_val) {
3583                 q_vector->itr_val = new_val;
3584                 q_vector->set_itr = 1;
3585         }
3586 clear_counts:
3587         if (q_vector->rx_ring) {
3588                 q_vector->rx_ring->total_bytes = 0;
3589                 q_vector->rx_ring->total_packets = 0;
3590         }
3591         if (q_vector->tx_ring) {
3592                 q_vector->tx_ring->total_bytes = 0;
3593                 q_vector->tx_ring->total_packets = 0;
3594         }
3595 }
3596
3597 /**
3598  * igb_update_itr - update the dynamic ITR value based on statistics
3599  *      Stores a new ITR value based on packets and byte
3600  *      counts during the last interrupt.  The advantage of per interrupt
3601  *      computation is faster updates and more accurate ITR for the current
3602  *      traffic pattern.  Constants in this function were computed
3603  *      based on theoretical maximum wire speed and thresholds were set based
3604  *      on testing data as well as attempting to minimize response time
3605  *      while increasing bulk throughput.
3606  *      this functionality is controlled by the InterruptThrottleRate module
3607  *      parameter (see igb_param.c)
3608  *      NOTE:  These calculations are only valid when operating in a single-
3609  *             queue environment.
3610  * @adapter: pointer to adapter
3611  * @itr_setting: current q_vector->itr_val
3612  * @packets: the number of packets during this measurement interval
3613  * @bytes: the number of bytes during this measurement interval
3614  **/
3615 static unsigned int igb_update_itr(struct igb_adapter *adapter, u16 itr_setting,
3616                                    int packets, int bytes)
3617 {
3618         unsigned int retval = itr_setting;
3619
3620         if (packets == 0)
3621                 goto update_itr_done;
3622
3623         switch (itr_setting) {
3624         case lowest_latency:
3625                 /* handle TSO and jumbo frames */
3626                 if (bytes/packets > 8000)
3627                         retval = bulk_latency;
3628                 else if ((packets < 5) && (bytes > 512))
3629                         retval = low_latency;
3630                 break;
3631         case low_latency:  /* 50 usec aka 20000 ints/s */
3632                 if (bytes > 10000) {
3633                         /* this if handles the TSO accounting */
3634                         if (bytes/packets > 8000) {
3635                                 retval = bulk_latency;
3636                         } else if ((packets < 10) || ((bytes/packets) > 1200)) {
3637                                 retval = bulk_latency;
3638                         } else if ((packets > 35)) {
3639                                 retval = lowest_latency;
3640                         }
3641                 } else if (bytes/packets > 2000) {
3642                         retval = bulk_latency;
3643                 } else if (packets <= 2 && bytes < 512) {
3644                         retval = lowest_latency;
3645                 }
3646                 break;
3647         case bulk_latency: /* 250 usec aka 4000 ints/s */
3648                 if (bytes > 25000) {
3649                         if (packets > 35)
3650                                 retval = low_latency;
3651                 } else if (bytes < 1500) {
3652                         retval = low_latency;
3653                 }
3654                 break;
3655         }
3656
3657 update_itr_done:
3658         return retval;
3659 }
3660
3661 static void igb_set_itr(struct igb_adapter *adapter)
3662 {
3663         struct igb_q_vector *q_vector = adapter->q_vector[0];
3664         u16 current_itr;
3665         u32 new_itr = q_vector->itr_val;
3666
3667         /* for non-gigabit speeds, just fix the interrupt rate at 4000 */
3668         if (adapter->link_speed != SPEED_1000) {
3669                 current_itr = 0;
3670                 new_itr = 4000;
3671                 goto set_itr_now;
3672         }
3673
3674         adapter->rx_itr = igb_update_itr(adapter,
3675                                     adapter->rx_itr,
3676                                     q_vector->rx_ring->total_packets,
3677                                     q_vector->rx_ring->total_bytes);
3678
3679         adapter->tx_itr = igb_update_itr(adapter,
3680                                     adapter->tx_itr,
3681                                     q_vector->tx_ring->total_packets,
3682                                     q_vector->tx_ring->total_bytes);
3683         current_itr = max(adapter->rx_itr, adapter->tx_itr);
3684
3685         /* conservative mode (itr 3) eliminates the lowest_latency setting */
3686         if (adapter->rx_itr_setting == 3 && current_itr == lowest_latency)
3687                 current_itr = low_latency;
3688
3689         switch (current_itr) {
3690         /* counts and packets in update_itr are dependent on these numbers */
3691         case lowest_latency:
3692                 new_itr = 56;  /* aka 70,000 ints/sec */
3693                 break;
3694         case low_latency:
3695                 new_itr = 196; /* aka 20,000 ints/sec */
3696                 break;
3697         case bulk_latency:
3698                 new_itr = 980; /* aka 4,000 ints/sec */
3699                 break;
3700         default:
3701                 break;
3702         }
3703
3704 set_itr_now:
3705         q_vector->rx_ring->total_bytes = 0;
3706         q_vector->rx_ring->total_packets = 0;
3707         q_vector->tx_ring->total_bytes = 0;
3708         q_vector->tx_ring->total_packets = 0;
3709
3710         if (new_itr != q_vector->itr_val) {
3711                 /* this attempts to bias the interrupt rate towards Bulk
3712                  * by adding intermediate steps when interrupt rate is
3713                  * increasing */
3714                 new_itr = new_itr > q_vector->itr_val ?
3715                              max((new_itr * q_vector->itr_val) /
3716                                  (new_itr + (q_vector->itr_val >> 2)),
3717                                  new_itr) :
3718                              new_itr;
3719                 /* Don't write the value here; it resets the adapter's
3720                  * internal timer, and causes us to delay far longer than
3721                  * we should between interrupts.  Instead, we write the ITR
3722                  * value at the beginning of the next interrupt so the timing
3723                  * ends up being correct.
3724                  */
3725                 q_vector->itr_val = new_itr;
3726                 q_vector->set_itr = 1;
3727         }
3728 }
3729
3730 #define IGB_TX_FLAGS_CSUM               0x00000001
3731 #define IGB_TX_FLAGS_VLAN               0x00000002
3732 #define IGB_TX_FLAGS_TSO                0x00000004
3733 #define IGB_TX_FLAGS_IPV4               0x00000008
3734 #define IGB_TX_FLAGS_TSTAMP             0x00000010
3735 #define IGB_TX_FLAGS_VLAN_MASK          0xffff0000
3736 #define IGB_TX_FLAGS_VLAN_SHIFT                 16
3737
3738 static inline int igb_tso_adv(struct igb_ring *tx_ring,
3739                               struct sk_buff *skb, u32 tx_flags, u8 *hdr_len)
3740 {
3741         struct e1000_adv_tx_context_desc *context_desc;
3742         unsigned int i;
3743         int err;
3744         struct igb_buffer *buffer_info;
3745         u32 info = 0, tu_cmd = 0;
3746         u32 mss_l4len_idx;
3747         u8 l4len;
3748
3749         if (skb_header_cloned(skb)) {
3750                 err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
3751                 if (err)
3752                         return err;
3753         }
3754
3755         l4len = tcp_hdrlen(skb);
3756         *hdr_len += l4len;
3757
3758         if (skb->protocol == htons(ETH_P_IP)) {
3759                 struct iphdr *iph = ip_hdr(skb);
3760                 iph->tot_len = 0;
3761                 iph->check = 0;
3762                 tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr,
3763                                                          iph->daddr, 0,
3764                                                          IPPROTO_TCP,
3765                                                          0);
3766         } else if (skb_is_gso_v6(skb)) {
3767                 ipv6_hdr(skb)->payload_len = 0;
3768                 tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
3769                                                        &ipv6_hdr(skb)->daddr,
3770                                                        0, IPPROTO_TCP, 0);
3771         }
3772
3773         i = tx_ring->next_to_use;
3774
3775         buffer_info = &tx_ring->buffer_info[i];
3776         context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3777         /* VLAN MACLEN IPLEN */
3778         if (tx_flags & IGB_TX_FLAGS_VLAN)
3779                 info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3780         info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3781         *hdr_len += skb_network_offset(skb);
3782         info |= skb_network_header_len(skb);
3783         *hdr_len += skb_network_header_len(skb);
3784         context_desc->vlan_macip_lens = cpu_to_le32(info);
3785
3786         /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */
3787         tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3788
3789         if (skb->protocol == htons(ETH_P_IP))
3790                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3791         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3792
3793         context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3794
3795         /* MSS L4LEN IDX */
3796         mss_l4len_idx = (skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT);
3797         mss_l4len_idx |= (l4len << E1000_ADVTXD_L4LEN_SHIFT);
3798
3799         /* For 82575, context index must be unique per ring. */
3800         if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3801                 mss_l4len_idx |= tx_ring->reg_idx << 4;
3802
3803         context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx);
3804         context_desc->seqnum_seed = 0;
3805
3806         buffer_info->time_stamp = jiffies;
3807         buffer_info->next_to_watch = i;
3808         buffer_info->dma = 0;
3809         i++;
3810         if (i == tx_ring->count)
3811                 i = 0;
3812
3813         tx_ring->next_to_use = i;
3814
3815         return true;
3816 }
3817
3818 static inline bool igb_tx_csum_adv(struct igb_ring *tx_ring,
3819                                    struct sk_buff *skb, u32 tx_flags)
3820 {
3821         struct e1000_adv_tx_context_desc *context_desc;
3822         struct device *dev = tx_ring->dev;
3823         struct igb_buffer *buffer_info;
3824         u32 info = 0, tu_cmd = 0;
3825         unsigned int i;
3826
3827         if ((skb->ip_summed == CHECKSUM_PARTIAL) ||
3828             (tx_flags & IGB_TX_FLAGS_VLAN)) {
3829                 i = tx_ring->next_to_use;
3830                 buffer_info = &tx_ring->buffer_info[i];
3831                 context_desc = E1000_TX_CTXTDESC_ADV(*tx_ring, i);
3832
3833                 if (tx_flags & IGB_TX_FLAGS_VLAN)
3834                         info |= (tx_flags & IGB_TX_FLAGS_VLAN_MASK);
3835
3836                 info |= (skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT);
3837                 if (skb->ip_summed == CHECKSUM_PARTIAL)
3838                         info |= skb_network_header_len(skb);
3839
3840                 context_desc->vlan_macip_lens = cpu_to_le32(info);
3841
3842                 tu_cmd |= (E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT);
3843
3844                 if (skb->ip_summed == CHECKSUM_PARTIAL) {
3845                         __be16 protocol;
3846
3847                         if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
3848                                 const struct vlan_ethhdr *vhdr =
3849                                           (const struct vlan_ethhdr*)skb->data;
3850
3851                                 protocol = vhdr->h_vlan_encapsulated_proto;
3852                         } else {
3853                                 protocol = skb->protocol;
3854                         }
3855
3856                         switch (protocol) {
3857                         case cpu_to_be16(ETH_P_IP):
3858                                 tu_cmd |= E1000_ADVTXD_TUCMD_IPV4;
3859                                 if (ip_hdr(skb)->protocol == IPPROTO_TCP)
3860                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3861                                 else if (ip_hdr(skb)->protocol == IPPROTO_SCTP)
3862                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3863                                 break;
3864                         case cpu_to_be16(ETH_P_IPV6):
3865                                 /* XXX what about other V6 headers?? */
3866                                 if (ipv6_hdr(skb)->nexthdr == IPPROTO_TCP)
3867                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_TCP;
3868                                 else if (ipv6_hdr(skb)->nexthdr == IPPROTO_SCTP)
3869                                         tu_cmd |= E1000_ADVTXD_TUCMD_L4T_SCTP;
3870                                 break;
3871                         default:
3872                                 if (unlikely(net_ratelimit()))
3873                                         dev_warn(dev,
3874                                             "partial checksum but proto=%x!\n",
3875                                             skb->protocol);
3876                                 break;
3877                         }
3878                 }
3879
3880                 context_desc->type_tucmd_mlhl = cpu_to_le32(tu_cmd);
3881                 context_desc->seqnum_seed = 0;
3882                 if (tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX)
3883                         context_desc->mss_l4len_idx =
3884                                 cpu_to_le32(tx_ring->reg_idx << 4);
3885
3886                 buffer_info->time_stamp = jiffies;
3887                 buffer_info->next_to_watch = i;
3888                 buffer_info->dma = 0;
3889
3890                 i++;
3891                 if (i == tx_ring->count)
3892                         i = 0;
3893                 tx_ring->next_to_use = i;
3894
3895                 return true;
3896         }
3897         return false;
3898 }
3899
3900 #define IGB_MAX_TXD_PWR 16
3901 #define IGB_MAX_DATA_PER_TXD    (1<<IGB_MAX_TXD_PWR)
3902
3903 static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb,
3904                                  unsigned int first)
3905 {
3906         struct igb_buffer *buffer_info;
3907         struct device *dev = tx_ring->dev;
3908         unsigned int hlen = skb_headlen(skb);
3909         unsigned int count = 0, i;
3910         unsigned int f;
3911         u16 gso_segs = skb_shinfo(skb)->gso_segs ?: 1;
3912
3913         i = tx_ring->next_to_use;
3914
3915         buffer_info = &tx_ring->buffer_info[i];
3916         BUG_ON(hlen >= IGB_MAX_DATA_PER_TXD);
3917         buffer_info->length = hlen;
3918         /* set time_stamp *before* dma to help avoid a possible race */
3919         buffer_info->time_stamp = jiffies;
3920         buffer_info->next_to_watch = i;
3921         buffer_info->dma = dma_map_single(dev, skb->data, hlen,
3922                                           DMA_TO_DEVICE);
3923         if (dma_mapping_error(dev, buffer_info->dma))
3924                 goto dma_error;
3925
3926         for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) {
3927                 struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[f];
3928                 unsigned int len = frag->size;
3929
3930                 count++;
3931                 i++;
3932                 if (i == tx_ring->count)
3933                         i = 0;
3934
3935                 buffer_info = &tx_ring->buffer_info[i];
3936                 BUG_ON(len >= IGB_MAX_DATA_PER_TXD);
3937                 buffer_info->length = len;
3938                 buffer_info->time_stamp = jiffies;
3939                 buffer_info->next_to_watch = i;
3940                 buffer_info->mapped_as_page = true;
3941                 buffer_info->dma = dma_map_page(dev,
3942                                                 frag->page,
3943                                                 frag->page_offset,
3944                                                 len,
3945                                                 DMA_TO_DEVICE);
3946                 if (dma_mapping_error(dev, buffer_info->dma))
3947                         goto dma_error;
3948
3949         }
3950
3951         tx_ring->buffer_info[i].skb = skb;
3952         tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags;
3953         /* multiply data chunks by size of headers */
3954         tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len;
3955         tx_ring->buffer_info[i].gso_segs = gso_segs;
3956         tx_ring->buffer_info[first].next_to_watch = i;
3957
3958         return ++count;
3959
3960 dma_error:
3961         dev_err(dev, "TX DMA map failed\n");
3962
3963         /* clear timestamp and dma mappings for failed buffer_info mapping */
3964         buffer_info->dma = 0;
3965         buffer_info->time_stamp = 0;
3966         buffer_info->length = 0;
3967         buffer_info->next_to_watch = 0;
3968         buffer_info->mapped_as_page = false;
3969
3970         /* clear timestamp and dma mappings for remaining portion of packet */
3971         while (count--) {
3972                 if (i == 0)
3973                         i = tx_ring->count;
3974                 i--;
3975                 buffer_info = &tx_ring->buffer_info[i];
3976                 igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
3977         }
3978
3979         return 0;
3980 }
3981
3982 static inline void igb_tx_queue_adv(struct igb_ring *tx_ring,
3983                                     u32 tx_flags, int count, u32 paylen,
3984                                     u8 hdr_len)
3985 {
3986         union e1000_adv_tx_desc *tx_desc;
3987         struct igb_buffer *buffer_info;
3988         u32 olinfo_status = 0, cmd_type_len;
3989         unsigned int i = tx_ring->next_to_use;
3990
3991         cmd_type_len = (E1000_ADVTXD_DTYP_DATA | E1000_ADVTXD_DCMD_IFCS |
3992                         E1000_ADVTXD_DCMD_DEXT);
3993
3994         if (tx_flags & IGB_TX_FLAGS_VLAN)
3995                 cmd_type_len |= E1000_ADVTXD_DCMD_VLE;
3996
3997         if (tx_flags & IGB_TX_FLAGS_TSTAMP)
3998                 cmd_type_len |= E1000_ADVTXD_MAC_TSTAMP;
3999
4000         if (tx_flags & IGB_TX_FLAGS_TSO) {
4001                 cmd_type_len |= E1000_ADVTXD_DCMD_TSE;
4002
4003                 /* insert tcp checksum */
4004                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4005
4006                 /* insert ip checksum */
4007                 if (tx_flags & IGB_TX_FLAGS_IPV4)
4008                         olinfo_status |= E1000_TXD_POPTS_IXSM << 8;
4009
4010         } else if (tx_flags & IGB_TX_FLAGS_CSUM) {
4011                 olinfo_status |= E1000_TXD_POPTS_TXSM << 8;
4012         }
4013
4014         if ((tx_ring->flags & IGB_RING_FLAG_TX_CTX_IDX) &&
4015             (tx_flags & (IGB_TX_FLAGS_CSUM |
4016                          IGB_TX_FLAGS_TSO |
4017                          IGB_TX_FLAGS_VLAN)))
4018                 olinfo_status |= tx_ring->reg_idx << 4;
4019
4020         olinfo_status |= ((paylen - hdr_len) << E1000_ADVTXD_PAYLEN_SHIFT);
4021
4022         do {
4023                 buffer_info = &tx_ring->buffer_info[i];
4024                 tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
4025                 tx_desc->read.buffer_addr = cpu_to_le64(buffer_info->dma);
4026                 tx_desc->read.cmd_type_len =
4027                         cpu_to_le32(cmd_type_len | buffer_info->length);
4028                 tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status);
4029                 count--;
4030                 i++;
4031                 if (i == tx_ring->count)
4032                         i = 0;
4033         } while (count > 0);
4034
4035         tx_desc->read.cmd_type_len |= cpu_to_le32(IGB_ADVTXD_DCMD);
4036         /* Force memory writes to complete before letting h/w
4037          * know there are new descriptors to fetch.  (Only
4038          * applicable for weak-ordered memory model archs,
4039          * such as IA-64). */
4040         wmb();
4041
4042         tx_ring->next_to_use = i;
4043         writel(i, tx_ring->tail);
4044         /* we need this if more than one processor can write to our tail
4045          * at a time, it syncronizes IO on IA64/Altix systems */
4046         mmiowb();
4047 }
4048
4049 static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4050 {
4051         struct net_device *netdev = tx_ring->netdev;
4052
4053         netif_stop_subqueue(netdev, tx_ring->queue_index);
4054
4055         /* Herbert's original patch had:
4056          *  smp_mb__after_netif_stop_queue();
4057          * but since that doesn't exist yet, just open code it. */
4058         smp_mb();
4059
4060         /* We need to check again in a case another CPU has just
4061          * made room available. */
4062         if (igb_desc_unused(tx_ring) < size)
4063                 return -EBUSY;
4064
4065         /* A reprieve! */
4066         netif_wake_subqueue(netdev, tx_ring->queue_index);
4067         tx_ring->tx_stats.restart_queue++;
4068         return 0;
4069 }
4070
4071 static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, int size)
4072 {
4073         if (igb_desc_unused(tx_ring) >= size)
4074                 return 0;
4075         return __igb_maybe_stop_tx(tx_ring, size);
4076 }
4077
4078 netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb,
4079                                     struct igb_ring *tx_ring)
4080 {
4081         struct igb_adapter *adapter = netdev_priv(tx_ring->netdev);
4082         int tso = 0, count;
4083         u32 tx_flags = 0;
4084         u16 first;
4085         u8 hdr_len = 0;
4086         union skb_shared_tx *shtx = skb_tx(skb);
4087
4088         /* need: 1 descriptor per page,
4089          *       + 2 desc gap to keep tail from touching head,
4090          *       + 1 desc for skb->data,
4091          *       + 1 desc for context descriptor,
4092          * otherwise try next time */
4093         if (igb_maybe_stop_tx(tx_ring, skb_shinfo(skb)->nr_frags + 4)) {
4094                 /* this is a hard error */
4095                 return NETDEV_TX_BUSY;
4096         }
4097
4098         if (unlikely(shtx->hardware)) {
4099                 shtx->in_progress = 1;
4100                 tx_flags |= IGB_TX_FLAGS_TSTAMP;
4101         }
4102
4103         if (vlan_tx_tag_present(skb) && adapter->vlgrp) {
4104                 tx_flags |= IGB_TX_FLAGS_VLAN;
4105                 tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
4106         }
4107
4108         if (skb->protocol == htons(ETH_P_IP))
4109                 tx_flags |= IGB_TX_FLAGS_IPV4;
4110
4111         first = tx_ring->next_to_use;
4112         if (skb_is_gso(skb)) {
4113                 tso = igb_tso_adv(tx_ring, skb, tx_flags, &hdr_len);
4114
4115                 if (tso < 0) {
4116                         dev_kfree_skb_any(skb);
4117                         return NETDEV_TX_OK;
4118                 }
4119         }
4120
4121         if (tso)
4122                 tx_flags |= IGB_TX_FLAGS_TSO;
4123         else if (igb_tx_csum_adv(tx_ring, skb, tx_flags) &&
4124                  (skb->ip_summed == CHECKSUM_PARTIAL))
4125                 tx_flags |= IGB_TX_FLAGS_CSUM;
4126
4127         /*
4128          * count reflects descriptors mapped, if 0 or less then mapping error
4129          * has occured and we need to rewind the descriptor queue
4130          */
4131         count = igb_tx_map_adv(tx_ring, skb, first);
4132         if (!count) {
4133                 dev_kfree_skb_any(skb);
4134                 tx_ring->buffer_info[first].time_stamp = 0;
4135                 tx_ring->next_to_use = first;
4136                 return NETDEV_TX_OK;
4137         }
4138
4139         igb_tx_queue_adv(tx_ring, tx_flags, count, skb->len, hdr_len);
4140
4141         /* Make sure there is space in the ring for the next send. */
4142         igb_maybe_stop_tx(tx_ring, MAX_SKB_FRAGS + 4);
4143
4144         return NETDEV_TX_OK;
4145 }
4146
4147 static netdev_tx_t igb_xmit_frame_adv(struct sk_buff *skb,
4148                                       struct net_device *netdev)
4149 {
4150         struct igb_adapter *adapter = netdev_priv(netdev);
4151         struct igb_ring *tx_ring;
4152         int r_idx = 0;
4153
4154         if (test_bit(__IGB_DOWN, &adapter->state)) {
4155                 dev_kfree_skb_any(skb);
4156                 return NETDEV_TX_OK;
4157         }
4158
4159         if (skb->len <= 0) {
4160                 dev_kfree_skb_any(skb);
4161                 return NETDEV_TX_OK;
4162         }
4163
4164         r_idx = skb->queue_mapping & (IGB_ABS_MAX_TX_QUEUES - 1);
4165         tx_ring = adapter->multi_tx_table[r_idx];
4166
4167         /* This goes back to the question of how to logically map a tx queue
4168          * to a flow.  Right now, performance is impacted slightly negatively
4169          * if using multiple tx queues.  If the stack breaks away from a
4170          * single qdisc implementation, we can look at this again. */
4171         return igb_xmit_frame_ring_adv(skb, tx_ring);
4172 }
4173
4174 /**
4175  * igb_tx_timeout - Respond to a Tx Hang
4176  * @netdev: network interface device structure
4177  **/
4178 static void igb_tx_timeout(struct net_device *netdev)
4179 {
4180         struct igb_adapter *adapter = netdev_priv(netdev);
4181         struct e1000_hw *hw = &adapter->hw;
4182
4183         /* Do the reset outside of interrupt context */
4184         adapter->tx_timeout_count++;
4185
4186         if (hw->mac.type == e1000_82580)
4187                 hw->dev_spec._82575.global_device_reset = true;
4188
4189         schedule_work(&adapter->reset_task);
4190         wr32(E1000_EICS,
4191              (adapter->eims_enable_mask & ~adapter->eims_other));
4192 }
4193
4194 static void igb_reset_task(struct work_struct *work)
4195 {
4196         struct igb_adapter *adapter;
4197         adapter = container_of(work, struct igb_adapter, reset_task);
4198
4199         igb_dump(adapter);
4200         netdev_err(adapter->netdev, "Reset adapter\n");
4201         igb_reinit_locked(adapter);
4202 }
4203
4204 /**
4205  * igb_get_stats - Get System Network Statistics
4206  * @netdev: network interface device structure
4207  *
4208  * Returns the address of the device statistics structure.
4209  * The statistics are actually updated from the timer callback.
4210  **/
4211 static struct net_device_stats *igb_get_stats(struct net_device *netdev)
4212 {
4213         /* only return the current stats */
4214         return &netdev->stats;
4215 }
4216
4217 /**
4218  * igb_change_mtu - Change the Maximum Transfer Unit
4219  * @netdev: network interface device structure
4220  * @new_mtu: new value for maximum frame size
4221  *
4222  * Returns 0 on success, negative on failure
4223  **/
4224 static int igb_change_mtu(struct net_device *netdev, int new_mtu)
4225 {
4226         struct igb_adapter *adapter = netdev_priv(netdev);
4227         struct pci_dev *pdev = adapter->pdev;
4228         int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN;
4229         u32 rx_buffer_len, i;
4230
4231         if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) {
4232                 dev_err(&pdev->dev, "Invalid MTU setting\n");
4233                 return -EINVAL;
4234         }
4235
4236         if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) {
4237                 dev_err(&pdev->dev, "MTU > 9216 not supported.\n");
4238                 return -EINVAL;
4239         }
4240
4241         while (test_and_set_bit(__IGB_RESETTING, &adapter->state))
4242                 msleep(1);
4243
4244         /* igb_down has a dependency on max_frame_size */
4245         adapter->max_frame_size = max_frame;
4246
4247         /* NOTE: netdev_alloc_skb reserves 16 bytes, and typically NET_IP_ALIGN
4248          * means we reserve 2 more, this pushes us to allocate from the next
4249          * larger slab size.
4250          * i.e. RXBUFFER_2048 --> size-4096 slab
4251          */
4252
4253         if (adapter->hw.mac.type == e1000_82580)
4254                 max_frame += IGB_TS_HDR_LEN;
4255
4256         if (max_frame <= IGB_RXBUFFER_1024)
4257                 rx_buffer_len = IGB_RXBUFFER_1024;
4258         else if (max_frame <= MAXIMUM_ETHERNET_VLAN_SIZE)
4259                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE;
4260         else
4261                 rx_buffer_len = IGB_RXBUFFER_128;
4262
4263         if ((max_frame == ETH_FRAME_LEN + ETH_FCS_LEN + IGB_TS_HDR_LEN) ||
4264              (max_frame == MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN))
4265                 rx_buffer_len = MAXIMUM_ETHERNET_VLAN_SIZE + IGB_TS_HDR_LEN;
4266
4267         if ((adapter->hw.mac.type == e1000_82580) &&
4268             (rx_buffer_len == IGB_RXBUFFER_128))
4269                 rx_buffer_len += IGB_RXBUFFER_64;
4270
4271         if (netif_running(netdev))
4272                 igb_down(adapter);
4273
4274         dev_info(&pdev->dev, "changing MTU from %d to %d\n",
4275                  netdev->mtu, new_mtu);
4276         netdev->mtu = new_mtu;
4277
4278         for (i = 0; i < adapter->num_rx_queues; i++)
4279                 adapter->rx_ring[i]->rx_buffer_len = rx_buffer_len;
4280
4281         if (netif_running(netdev))
4282                 igb_up(adapter);
4283         else
4284                 igb_reset(adapter);
4285
4286         clear_bit(__IGB_RESETTING, &adapter->state);
4287
4288         return 0;
4289 }
4290
4291 /**
4292  * igb_update_stats - Update the board statistics counters
4293  * @adapter: board private structure
4294  **/
4295
4296 void igb_update_stats(struct igb_adapter *adapter)
4297 {
4298         struct net_device_stats *net_stats = igb_get_stats(adapter->netdev);
4299         struct e1000_hw *hw = &adapter->hw;
4300         struct pci_dev *pdev = adapter->pdev;
4301         u32 reg, mpc;
4302         u16 phy_tmp;
4303         int i;
4304         u64 bytes, packets;
4305
4306 #define PHY_IDLE_ERROR_COUNT_MASK 0x00FF
4307
4308         /*
4309          * Prevent stats update while adapter is being reset, or if the pci
4310          * connection is down.
4311          */
4312         if (adapter->link_speed == 0)
4313                 return;
4314         if (pci_channel_offline(pdev))
4315                 return;
4316
4317         bytes = 0;
4318         packets = 0;
4319         for (i = 0; i < adapter->num_rx_queues; i++) {
4320                 u32 rqdpc_tmp = rd32(E1000_RQDPC(i)) & 0x0FFF;
4321                 struct igb_ring *ring = adapter->rx_ring[i];
4322                 ring->rx_stats.drops += rqdpc_tmp;
4323                 net_stats->rx_fifo_errors += rqdpc_tmp;
4324                 bytes += ring->rx_stats.bytes;
4325                 packets += ring->rx_stats.packets;
4326         }
4327
4328         net_stats->rx_bytes = bytes;
4329         net_stats->rx_packets = packets;
4330
4331         bytes = 0;
4332         packets = 0;
4333         for (i = 0; i < adapter->num_tx_queues; i++) {
4334                 struct igb_ring *ring = adapter->tx_ring[i];
4335                 bytes += ring->tx_stats.bytes;
4336                 packets += ring->tx_stats.packets;
4337         }
4338         net_stats->tx_bytes = bytes;
4339         net_stats->tx_packets = packets;
4340
4341         /* read stats registers */
4342         adapter->stats.crcerrs += rd32(E1000_CRCERRS);
4343         adapter->stats.gprc += rd32(E1000_GPRC);
4344         adapter->stats.gorc += rd32(E1000_GORCL);
4345         rd32(E1000_GORCH); /* clear GORCL */
4346         adapter->stats.bprc += rd32(E1000_BPRC);
4347         adapter->stats.mprc += rd32(E1000_MPRC);
4348         adapter->stats.roc += rd32(E1000_ROC);
4349
4350         adapter->stats.prc64 += rd32(E1000_PRC64);
4351         adapter->stats.prc127 += rd32(E1000_PRC127);
4352         adapter->stats.prc255 += rd32(E1000_PRC255);
4353         adapter->stats.prc511 += rd32(E1000_PRC511);
4354         adapter->stats.prc1023 += rd32(E1000_PRC1023);
4355         adapter->stats.prc1522 += rd32(E1000_PRC1522);
4356         adapter->stats.symerrs += rd32(E1000_SYMERRS);
4357         adapter->stats.sec += rd32(E1000_SEC);
4358
4359         mpc = rd32(E1000_MPC);
4360         adapter->stats.mpc += mpc;
4361         net_stats->rx_fifo_errors += mpc;
4362         adapter->stats.scc += rd32(E1000_SCC);
4363         adapter->stats.ecol += rd32(E1000_ECOL);
4364         adapter->stats.mcc += rd32(E1000_MCC);
4365         adapter->stats.latecol += rd32(E1000_LATECOL);
4366         adapter->stats.dc += rd32(E1000_DC);
4367         adapter->stats.rlec += rd32(E1000_RLEC);
4368         adapter->stats.xonrxc += rd32(E1000_XONRXC);
4369         adapter->stats.xontxc += rd32(E1000_XONTXC);
4370         adapter->stats.xoffrxc += rd32(E1000_XOFFRXC);
4371         adapter->stats.xofftxc += rd32(E1000_XOFFTXC);
4372         adapter->stats.fcruc += rd32(E1000_FCRUC);
4373         adapter->stats.gptc += rd32(E1000_GPTC);
4374         adapter->stats.gotc += rd32(E1000_GOTCL);
4375         rd32(E1000_GOTCH); /* clear GOTCL */
4376         adapter->stats.rnbc += rd32(E1000_RNBC);
4377         adapter->stats.ruc += rd32(E1000_RUC);
4378         adapter->stats.rfc += rd32(E1000_RFC);
4379         adapter->stats.rjc += rd32(E1000_RJC);
4380         adapter->stats.tor += rd32(E1000_TORH);
4381         adapter->stats.tot += rd32(E1000_TOTH);
4382         adapter->stats.tpr += rd32(E1000_TPR);
4383
4384         adapter->stats.ptc64 += rd32(E1000_PTC64);
4385         adapter->stats.ptc127 += rd32(E1000_PTC127);
4386         adapter->stats.ptc255 += rd32(E1000_PTC255);
4387         adapter->stats.ptc511 += rd32(E1000_PTC511);
4388         adapter->stats.ptc1023 += rd32(E1000_PTC1023);
4389         adapter->stats.ptc1522 += rd32(E1000_PTC1522);
4390
4391         adapter->stats.mptc += rd32(E1000_MPTC);
4392         adapter->stats.bptc += rd32(E1000_BPTC);
4393
4394         adapter->stats.tpt += rd32(E1000_TPT);
4395         adapter->stats.colc += rd32(E1000_COLC);
4396
4397         adapter->stats.algnerrc += rd32(E1000_ALGNERRC);
4398         /* read internal phy specific stats */
4399         reg = rd32(E1000_CTRL_EXT);
4400         if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) {
4401                 adapter->stats.rxerrc += rd32(E1000_RXERRC);
4402                 adapter->stats.tncrs += rd32(E1000_TNCRS);
4403         }
4404
4405         adapter->stats.tsctc += rd32(E1000_TSCTC);
4406         adapter->stats.tsctfc += rd32(E1000_TSCTFC);
4407
4408         adapter->stats.iac += rd32(E1000_IAC);
4409         adapter->stats.icrxoc += rd32(E1000_ICRXOC);
4410         adapter->stats.icrxptc += rd32(E1000_ICRXPTC);
4411         adapter->stats.icrxatc += rd32(E1000_ICRXATC);
4412         adapter->stats.ictxptc += rd32(E1000_ICTXPTC);
4413         adapter->stats.ictxatc += rd32(E1000_ICTXATC);
4414         adapter->stats.ictxqec += rd32(E1000_ICTXQEC);
4415         adapter->stats.ictxqmtc += rd32(E1000_ICTXQMTC);
4416         adapter->stats.icrxdmtc += rd32(E1000_ICRXDMTC);
4417
4418         /* Fill out the OS statistics structure */
4419         net_stats->multicast = adapter->stats.mprc;
4420         net_stats->collisions = adapter->stats.colc;
4421
4422         /* Rx Errors */
4423
4424         /* RLEC on some newer hardware can be incorrect so build
4425          * our own version based on RUC and ROC */
4426         net_stats->rx_errors = adapter->stats.rxerrc +
4427                 adapter->stats.crcerrs + adapter->stats.algnerrc +
4428                 adapter->stats.ruc + adapter->stats.roc +
4429                 adapter->stats.cexterr;
4430         net_stats->rx_length_errors = adapter->stats.ruc +
4431                                       adapter->stats.roc;
4432         net_stats->rx_crc_errors = adapter->stats.crcerrs;
4433         net_stats->rx_frame_errors = adapter->stats.algnerrc;
4434         net_stats->rx_missed_errors = adapter->stats.mpc;
4435
4436         /* Tx Errors */
4437         net_stats->tx_errors = adapter->stats.ecol +
4438                                adapter->stats.latecol;
4439         net_stats->tx_aborted_errors = adapter->stats.ecol;
4440         net_stats->tx_window_errors = adapter->stats.latecol;
4441         net_stats->tx_carrier_errors = adapter->stats.tncrs;
4442
4443         /* Tx Dropped needs to be maintained elsewhere */
4444
4445         /* Phy Stats */
4446         if (hw->phy.media_type == e1000_media_type_copper) {
4447                 if ((adapter->link_speed == SPEED_1000) &&
4448                    (!igb_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) {
4449                         phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK;
4450                         adapter->phy_stats.idle_errors += phy_tmp;
4451                 }
4452         }
4453
4454         /* Management Stats */
4455         adapter->stats.mgptc += rd32(E1000_MGTPTC);
4456         adapter->stats.mgprc += rd32(E1000_MGTPRC);
4457         adapter->stats.mgpdc += rd32(E1000_MGTPDC);
4458 }
4459
4460 static irqreturn_t igb_msix_other(int irq, void *data)
4461 {
4462         struct igb_adapter *adapter = data;
4463         struct e1000_hw *hw = &adapter->hw;
4464         u32 icr = rd32(E1000_ICR);
4465         /* reading ICR causes bit 31 of EICR to be cleared */
4466
4467         if (icr & E1000_ICR_DRSTA)
4468                 schedule_work(&adapter->reset_task);
4469
4470         if (icr & E1000_ICR_DOUTSYNC) {
4471                 /* HW is reporting DMA is out of sync */
4472                 adapter->stats.doosync++;
4473         }
4474
4475         /* Check for a mailbox event */
4476         if (icr & E1000_ICR_VMMB)
4477                 igb_msg_task(adapter);
4478
4479         if (icr & E1000_ICR_LSC) {
4480                 hw->mac.get_link_status = 1;
4481                 /* guard against interrupt when we're going down */
4482                 if (!test_bit(__IGB_DOWN, &adapter->state))
4483                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
4484         }
4485
4486         if (adapter->vfs_allocated_count)
4487                 wr32(E1000_IMS, E1000_IMS_LSC |
4488                                 E1000_IMS_VMMB |
4489                                 E1000_IMS_DOUTSYNC);
4490         else
4491                 wr32(E1000_IMS, E1000_IMS_LSC | E1000_IMS_DOUTSYNC);
4492         wr32(E1000_EIMS, adapter->eims_other);
4493
4494         return IRQ_HANDLED;
4495 }
4496
4497 static void igb_write_itr(struct igb_q_vector *q_vector)
4498 {
4499         struct igb_adapter *adapter = q_vector->adapter;
4500         u32 itr_val = q_vector->itr_val & 0x7FFC;
4501
4502         if (!q_vector->set_itr)
4503                 return;
4504
4505         if (!itr_val)
4506                 itr_val = 0x4;
4507
4508         if (adapter->hw.mac.type == e1000_82575)
4509                 itr_val |= itr_val << 16;
4510         else
4511                 itr_val |= 0x8000000;
4512
4513         writel(itr_val, q_vector->itr_register);
4514         q_vector->set_itr = 0;
4515 }
4516
4517 static irqreturn_t igb_msix_ring(int irq, void *data)
4518 {
4519         struct igb_q_vector *q_vector = data;
4520
4521         /* Write the ITR value calculated from the previous interrupt. */
4522         igb_write_itr(q_vector);
4523
4524         napi_schedule(&q_vector->napi);
4525
4526         return IRQ_HANDLED;
4527 }
4528
4529 #ifdef CONFIG_IGB_DCA
4530 static void igb_update_dca(struct igb_q_vector *q_vector)
4531 {
4532         struct igb_adapter *adapter = q_vector->adapter;
4533         struct e1000_hw *hw = &adapter->hw;
4534         int cpu = get_cpu();
4535
4536         if (q_vector->cpu == cpu)
4537                 goto out_no_update;
4538
4539         if (q_vector->tx_ring) {
4540                 int q = q_vector->tx_ring->reg_idx;
4541                 u32 dca_txctrl = rd32(E1000_DCA_TXCTRL(q));
4542                 if (hw->mac.type == e1000_82575) {
4543                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK;
4544                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4545                 } else {
4546                         dca_txctrl &= ~E1000_DCA_TXCTRL_CPUID_MASK_82576;
4547                         dca_txctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4548                                       E1000_DCA_TXCTRL_CPUID_SHIFT;
4549                 }
4550                 dca_txctrl |= E1000_DCA_TXCTRL_DESC_DCA_EN;
4551                 wr32(E1000_DCA_TXCTRL(q), dca_txctrl);
4552         }
4553         if (q_vector->rx_ring) {
4554                 int q = q_vector->rx_ring->reg_idx;
4555                 u32 dca_rxctrl = rd32(E1000_DCA_RXCTRL(q));
4556                 if (hw->mac.type == e1000_82575) {
4557                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK;
4558                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu);
4559                 } else {
4560                         dca_rxctrl &= ~E1000_DCA_RXCTRL_CPUID_MASK_82576;
4561                         dca_rxctrl |= dca3_get_tag(&adapter->pdev->dev, cpu) <<
4562                                       E1000_DCA_RXCTRL_CPUID_SHIFT;
4563                 }
4564                 dca_rxctrl |= E1000_DCA_RXCTRL_DESC_DCA_EN;
4565                 dca_rxctrl |= E1000_DCA_RXCTRL_HEAD_DCA_EN;
4566                 dca_rxctrl |= E1000_DCA_RXCTRL_DATA_DCA_EN;
4567                 wr32(E1000_DCA_RXCTRL(q), dca_rxctrl);
4568         }
4569         q_vector->cpu = cpu;
4570 out_no_update:
4571         put_cpu();
4572 }
4573
4574 static void igb_setup_dca(struct igb_adapter *adapter)
4575 {
4576         struct e1000_hw *hw = &adapter->hw;
4577         int i;
4578
4579         if (!(adapter->flags & IGB_FLAG_DCA_ENABLED))
4580                 return;
4581
4582         /* Always use CB2 mode, difference is masked in the CB driver. */
4583         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2);
4584
4585         for (i = 0; i < adapter->num_q_vectors; i++) {
4586                 adapter->q_vector[i]->cpu = -1;
4587                 igb_update_dca(adapter->q_vector[i]);
4588         }
4589 }
4590
4591 static int __igb_notify_dca(struct device *dev, void *data)
4592 {
4593         struct net_device *netdev = dev_get_drvdata(dev);
4594         struct igb_adapter *adapter = netdev_priv(netdev);
4595         struct pci_dev *pdev = adapter->pdev;
4596         struct e1000_hw *hw = &adapter->hw;
4597         unsigned long event = *(unsigned long *)data;
4598
4599         switch (event) {
4600         case DCA_PROVIDER_ADD:
4601                 /* if already enabled, don't do it again */
4602                 if (adapter->flags & IGB_FLAG_DCA_ENABLED)
4603                         break;
4604                 if (dca_add_requester(dev) == 0) {
4605                         adapter->flags |= IGB_FLAG_DCA_ENABLED;
4606                         dev_info(&pdev->dev, "DCA enabled\n");
4607                         igb_setup_dca(adapter);
4608                         break;
4609                 }
4610                 /* Fall Through since DCA is disabled. */
4611         case DCA_PROVIDER_REMOVE:
4612                 if (adapter->flags & IGB_FLAG_DCA_ENABLED) {
4613                         /* without this a class_device is left
4614                          * hanging around in the sysfs model */
4615                         dca_remove_requester(dev);
4616                         dev_info(&pdev->dev, "DCA disabled\n");
4617                         adapter->flags &= ~IGB_FLAG_DCA_ENABLED;
4618                         wr32(E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_DISABLE);
4619                 }
4620                 break;
4621         }
4622
4623         return 0;
4624 }
4625
4626 static int igb_notify_dca(struct notifier_block *nb, unsigned long event,
4627                           void *p)
4628 {
4629         int ret_val;
4630
4631         ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event,
4632                                          __igb_notify_dca);
4633
4634         return ret_val ? NOTIFY_BAD : NOTIFY_DONE;
4635 }
4636 #endif /* CONFIG_IGB_DCA */
4637
4638 static void igb_ping_all_vfs(struct igb_adapter *adapter)
4639 {
4640         struct e1000_hw *hw = &adapter->hw;
4641         u32 ping;
4642         int i;
4643
4644         for (i = 0 ; i < adapter->vfs_allocated_count; i++) {
4645                 ping = E1000_PF_CONTROL_MSG;
4646                 if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS)
4647                         ping |= E1000_VT_MSGTYPE_CTS;
4648                 igb_write_mbx(hw, &ping, 1, i);
4649         }
4650 }
4651
4652 static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4653 {
4654         struct e1000_hw *hw = &adapter->hw;
4655         u32 vmolr = rd32(E1000_VMOLR(vf));
4656         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4657
4658         vf_data->flags |= ~(IGB_VF_FLAG_UNI_PROMISC |
4659                             IGB_VF_FLAG_MULTI_PROMISC);
4660         vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4661
4662         if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) {
4663                 vmolr |= E1000_VMOLR_MPME;
4664                 *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST;
4665         } else {
4666                 /*
4667                  * if we have hashes and we are clearing a multicast promisc
4668                  * flag we need to write the hashes to the MTA as this step
4669                  * was previously skipped
4670                  */
4671                 if (vf_data->num_vf_mc_hashes > 30) {
4672                         vmolr |= E1000_VMOLR_MPME;
4673                 } else if (vf_data->num_vf_mc_hashes) {
4674                         int j;
4675                         vmolr |= E1000_VMOLR_ROMPE;
4676                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4677                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4678                 }
4679         }
4680
4681         wr32(E1000_VMOLR(vf), vmolr);
4682
4683         /* there are flags left unprocessed, likely not supported */
4684         if (*msgbuf & E1000_VT_MSGINFO_MASK)
4685                 return -EINVAL;
4686
4687         return 0;
4688
4689 }
4690
4691 static int igb_set_vf_multicasts(struct igb_adapter *adapter,
4692                                   u32 *msgbuf, u32 vf)
4693 {
4694         int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4695         u16 *hash_list = (u16 *)&msgbuf[1];
4696         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
4697         int i;
4698
4699         /* salt away the number of multicast addresses assigned
4700          * to this VF for later use to restore when the PF multi cast
4701          * list changes
4702          */
4703         vf_data->num_vf_mc_hashes = n;
4704
4705         /* only up to 30 hash values supported */
4706         if (n > 30)
4707                 n = 30;
4708
4709         /* store the hashes for later use */
4710         for (i = 0; i < n; i++)
4711                 vf_data->vf_mc_hashes[i] = hash_list[i];
4712
4713         /* Flush and reset the mta with the new values */
4714         igb_set_rx_mode(adapter->netdev);
4715
4716         return 0;
4717 }
4718
4719 static void igb_restore_vf_multicasts(struct igb_adapter *adapter)
4720 {
4721         struct e1000_hw *hw = &adapter->hw;
4722         struct vf_data_storage *vf_data;
4723         int i, j;
4724
4725         for (i = 0; i < adapter->vfs_allocated_count; i++) {
4726                 u32 vmolr = rd32(E1000_VMOLR(i));
4727                 vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME);
4728
4729                 vf_data = &adapter->vf_data[i];
4730
4731                 if ((vf_data->num_vf_mc_hashes > 30) ||
4732                     (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) {
4733                         vmolr |= E1000_VMOLR_MPME;
4734                 } else if (vf_data->num_vf_mc_hashes) {
4735                         vmolr |= E1000_VMOLR_ROMPE;
4736                         for (j = 0; j < vf_data->num_vf_mc_hashes; j++)
4737                                 igb_mta_set(hw, vf_data->vf_mc_hashes[j]);
4738                 }
4739                 wr32(E1000_VMOLR(i), vmolr);
4740         }
4741 }
4742
4743 static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf)
4744 {
4745         struct e1000_hw *hw = &adapter->hw;
4746         u32 pool_mask, reg, vid;
4747         int i;
4748
4749         pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4750
4751         /* Find the vlan filter for this id */
4752         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4753                 reg = rd32(E1000_VLVF(i));
4754
4755                 /* remove the vf from the pool */
4756                 reg &= ~pool_mask;
4757
4758                 /* if pool is empty then remove entry from vfta */
4759                 if (!(reg & E1000_VLVF_POOLSEL_MASK) &&
4760                     (reg & E1000_VLVF_VLANID_ENABLE)) {
4761                         reg = 0;
4762                         vid = reg & E1000_VLVF_VLANID_MASK;
4763                         igb_vfta_set(hw, vid, false);
4764                 }
4765
4766                 wr32(E1000_VLVF(i), reg);
4767         }
4768
4769         adapter->vf_data[vf].vlans_enabled = 0;
4770 }
4771
4772 static s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf)
4773 {
4774         struct e1000_hw *hw = &adapter->hw;
4775         u32 reg, i;
4776
4777         /* The vlvf table only exists on 82576 hardware and newer */
4778         if (hw->mac.type < e1000_82576)
4779                 return -1;
4780
4781         /* we only need to do this if VMDq is enabled */
4782         if (!adapter->vfs_allocated_count)
4783                 return -1;
4784
4785         /* Find the vlan filter for this id */
4786         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4787                 reg = rd32(E1000_VLVF(i));
4788                 if ((reg & E1000_VLVF_VLANID_ENABLE) &&
4789                     vid == (reg & E1000_VLVF_VLANID_MASK))
4790                         break;
4791         }
4792
4793         if (add) {
4794                 if (i == E1000_VLVF_ARRAY_SIZE) {
4795                         /* Did not find a matching VLAN ID entry that was
4796                          * enabled.  Search for a free filter entry, i.e.
4797                          * one without the enable bit set
4798                          */
4799                         for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) {
4800                                 reg = rd32(E1000_VLVF(i));
4801                                 if (!(reg & E1000_VLVF_VLANID_ENABLE))
4802                                         break;
4803                         }
4804                 }
4805                 if (i < E1000_VLVF_ARRAY_SIZE) {
4806                         /* Found an enabled/available entry */
4807                         reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf);
4808
4809                         /* if !enabled we need to set this up in vfta */
4810                         if (!(reg & E1000_VLVF_VLANID_ENABLE)) {
4811                                 /* add VID to filter table */
4812                                 igb_vfta_set(hw, vid, true);
4813                                 reg |= E1000_VLVF_VLANID_ENABLE;
4814                         }
4815                         reg &= ~E1000_VLVF_VLANID_MASK;
4816                         reg |= vid;
4817                         wr32(E1000_VLVF(i), reg);
4818
4819                         /* do not modify RLPML for PF devices */
4820                         if (vf >= adapter->vfs_allocated_count)
4821                                 return 0;
4822
4823                         if (!adapter->vf_data[vf].vlans_enabled) {
4824                                 u32 size;
4825                                 reg = rd32(E1000_VMOLR(vf));
4826                                 size = reg & E1000_VMOLR_RLPML_MASK;
4827                                 size += 4;
4828                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4829                                 reg |= size;
4830                                 wr32(E1000_VMOLR(vf), reg);
4831                         }
4832
4833                         adapter->vf_data[vf].vlans_enabled++;
4834                         return 0;
4835                 }
4836         } else {
4837                 if (i < E1000_VLVF_ARRAY_SIZE) {
4838                         /* remove vf from the pool */
4839                         reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf));
4840                         /* if pool is empty then remove entry from vfta */
4841                         if (!(reg & E1000_VLVF_POOLSEL_MASK)) {
4842                                 reg = 0;
4843                                 igb_vfta_set(hw, vid, false);
4844                         }
4845                         wr32(E1000_VLVF(i), reg);
4846
4847                         /* do not modify RLPML for PF devices */
4848                         if (vf >= adapter->vfs_allocated_count)
4849                                 return 0;
4850
4851                         adapter->vf_data[vf].vlans_enabled--;
4852                         if (!adapter->vf_data[vf].vlans_enabled) {
4853                                 u32 size;
4854                                 reg = rd32(E1000_VMOLR(vf));
4855                                 size = reg & E1000_VMOLR_RLPML_MASK;
4856                                 size -= 4;
4857                                 reg &= ~E1000_VMOLR_RLPML_MASK;
4858                                 reg |= size;
4859                                 wr32(E1000_VMOLR(vf), reg);
4860                         }
4861                 }
4862         }
4863         return 0;
4864 }
4865
4866 static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf)
4867 {
4868         struct e1000_hw *hw = &adapter->hw;
4869
4870         if (vid)
4871                 wr32(E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT));
4872         else
4873                 wr32(E1000_VMVIR(vf), 0);
4874 }
4875
4876 static int igb_ndo_set_vf_vlan(struct net_device *netdev,
4877                                int vf, u16 vlan, u8 qos)
4878 {
4879         int err = 0;
4880         struct igb_adapter *adapter = netdev_priv(netdev);
4881
4882         if ((vf >= adapter->vfs_allocated_count) || (vlan > 4095) || (qos > 7))
4883                 return -EINVAL;
4884         if (vlan || qos) {
4885                 err = igb_vlvf_set(adapter, vlan, !!vlan, vf);
4886                 if (err)
4887                         goto out;
4888                 igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf);
4889                 igb_set_vmolr(adapter, vf, !vlan);
4890                 adapter->vf_data[vf].pf_vlan = vlan;
4891                 adapter->vf_data[vf].pf_qos = qos;
4892                 dev_info(&adapter->pdev->dev,
4893                          "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf);
4894                 if (test_bit(__IGB_DOWN, &adapter->state)) {
4895                         dev_warn(&adapter->pdev->dev,
4896                                  "The VF VLAN has been set,"
4897                                  " but the PF device is not up.\n");
4898                         dev_warn(&adapter->pdev->dev,
4899                                  "Bring the PF device up before"
4900                                  " attempting to use the VF device.\n");
4901                 }
4902         } else {
4903                 igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan,
4904                                    false, vf);
4905                 igb_set_vmvir(adapter, vlan, vf);
4906                 igb_set_vmolr(adapter, vf, true);
4907                 adapter->vf_data[vf].pf_vlan = 0;
4908                 adapter->vf_data[vf].pf_qos = 0;
4909        }
4910 out:
4911        return err;
4912 }
4913
4914 static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf)
4915 {
4916         int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT;
4917         int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK);
4918
4919         return igb_vlvf_set(adapter, vid, add, vf);
4920 }
4921
4922 static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf)
4923 {
4924         /* clear flags */
4925         adapter->vf_data[vf].flags &= ~(IGB_VF_FLAG_PF_SET_MAC);
4926         adapter->vf_data[vf].last_nack = jiffies;
4927
4928         /* reset offloads to defaults */
4929         igb_set_vmolr(adapter, vf, true);
4930
4931         /* reset vlans for device */
4932         igb_clear_vf_vfta(adapter, vf);
4933         if (adapter->vf_data[vf].pf_vlan)
4934                 igb_ndo_set_vf_vlan(adapter->netdev, vf,
4935                                     adapter->vf_data[vf].pf_vlan,
4936                                     adapter->vf_data[vf].pf_qos);
4937         else
4938                 igb_clear_vf_vfta(adapter, vf);
4939
4940         /* reset multicast table array for vf */
4941         adapter->vf_data[vf].num_vf_mc_hashes = 0;
4942
4943         /* Flush and reset the mta with the new values */
4944         igb_set_rx_mode(adapter->netdev);
4945 }
4946
4947 static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf)
4948 {
4949         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4950
4951         /* generate a new mac address as we were hotplug removed/added */
4952         if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC))
4953                 random_ether_addr(vf_mac);
4954
4955         /* process remaining reset events */
4956         igb_vf_reset(adapter, vf);
4957 }
4958
4959 static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf)
4960 {
4961         struct e1000_hw *hw = &adapter->hw;
4962         unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses;
4963         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
4964         u32 reg, msgbuf[3];
4965         u8 *addr = (u8 *)(&msgbuf[1]);
4966
4967         /* process all the same items cleared in a function level reset */
4968         igb_vf_reset(adapter, vf);
4969
4970         /* set vf mac address */
4971         igb_rar_set_qsel(adapter, vf_mac, rar_entry, vf);
4972
4973         /* enable transmit and receive for vf */
4974         reg = rd32(E1000_VFTE);
4975         wr32(E1000_VFTE, reg | (1 << vf));
4976         reg = rd32(E1000_VFRE);
4977         wr32(E1000_VFRE, reg | (1 << vf));
4978
4979         adapter->vf_data[vf].flags = IGB_VF_FLAG_CTS;
4980
4981         /* reply to reset with ack and vf mac address */
4982         msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK;
4983         memcpy(addr, vf_mac, 6);
4984         igb_write_mbx(hw, msgbuf, 3, vf);
4985 }
4986
4987 static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf)
4988 {
4989         unsigned char *addr = (char *)&msg[1];
4990         int err = -1;
4991
4992         if (is_valid_ether_addr(addr))
4993                 err = igb_set_vf_mac(adapter, vf, addr);
4994
4995         return err;
4996 }
4997
4998 static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf)
4999 {
5000         struct e1000_hw *hw = &adapter->hw;
5001         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5002         u32 msg = E1000_VT_MSGTYPE_NACK;
5003
5004         /* if device isn't clear to send it shouldn't be reading either */
5005         if (!(vf_data->flags & IGB_VF_FLAG_CTS) &&
5006             time_after(jiffies, vf_data->last_nack + (2 * HZ))) {
5007                 igb_write_mbx(hw, &msg, 1, vf);
5008                 vf_data->last_nack = jiffies;
5009         }
5010 }
5011
5012 static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf)
5013 {
5014         struct pci_dev *pdev = adapter->pdev;
5015         u32 msgbuf[E1000_VFMAILBOX_SIZE];
5016         struct e1000_hw *hw = &adapter->hw;
5017         struct vf_data_storage *vf_data = &adapter->vf_data[vf];
5018         s32 retval;
5019
5020         retval = igb_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf);
5021
5022         if (retval) {
5023                 /* if receive failed revoke VF CTS stats and restart init */
5024                 dev_err(&pdev->dev, "Error receiving message from VF\n");
5025                 vf_data->flags &= ~IGB_VF_FLAG_CTS;
5026                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5027                         return;
5028                 goto out;
5029         }
5030
5031         /* this is a message we already processed, do nothing */
5032         if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK))
5033                 return;
5034
5035         /*
5036          * until the vf completes a reset it should not be
5037          * allowed to start any configuration.
5038          */
5039
5040         if (msgbuf[0] == E1000_VF_RESET) {
5041                 igb_vf_reset_msg(adapter, vf);
5042                 return;
5043         }
5044
5045         if (!(vf_data->flags & IGB_VF_FLAG_CTS)) {
5046                 if (!time_after(jiffies, vf_data->last_nack + (2 * HZ)))
5047                         return;
5048                 retval = -1;
5049                 goto out;
5050         }
5051
5052         switch ((msgbuf[0] & 0xFFFF)) {
5053         case E1000_VF_SET_MAC_ADDR:
5054                 retval = igb_set_vf_mac_addr(adapter, msgbuf, vf);
5055                 break;
5056         case E1000_VF_SET_PROMISC:
5057                 retval = igb_set_vf_promisc(adapter, msgbuf, vf);
5058                 break;
5059         case E1000_VF_SET_MULTICAST:
5060                 retval = igb_set_vf_multicasts(adapter, msgbuf, vf);
5061                 break;
5062         case E1000_VF_SET_LPE:
5063                 retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf);
5064                 break;
5065         case E1000_VF_SET_VLAN:
5066                 if (adapter->vf_data[vf].pf_vlan)
5067                         retval = -1;
5068                 else
5069                         retval = igb_set_vf_vlan(adapter, msgbuf, vf);
5070                 break;
5071         default:
5072                 dev_err(&pdev->dev, "Unhandled Msg %08x\n", msgbuf[0]);
5073                 retval = -1;
5074                 break;
5075         }
5076
5077         msgbuf[0] |= E1000_VT_MSGTYPE_CTS;
5078 out:
5079         /* notify the VF of the results of what it sent us */
5080         if (retval)
5081                 msgbuf[0] |= E1000_VT_MSGTYPE_NACK;
5082         else
5083                 msgbuf[0] |= E1000_VT_MSGTYPE_ACK;
5084
5085         igb_write_mbx(hw, msgbuf, 1, vf);
5086 }
5087
5088 static void igb_msg_task(struct igb_adapter *adapter)
5089 {
5090         struct e1000_hw *hw = &adapter->hw;
5091         u32 vf;
5092
5093         for (vf = 0; vf < adapter->vfs_allocated_count; vf++) {
5094                 /* process any reset requests */
5095                 if (!igb_check_for_rst(hw, vf))
5096                         igb_vf_reset_event(adapter, vf);
5097
5098                 /* process any messages pending */
5099                 if (!igb_check_for_msg(hw, vf))
5100                         igb_rcv_msg_from_vf(adapter, vf);
5101
5102                 /* process any acks */
5103                 if (!igb_check_for_ack(hw, vf))
5104                         igb_rcv_ack_from_vf(adapter, vf);
5105         }
5106 }
5107
5108 /**
5109  *  igb_set_uta - Set unicast filter table address
5110  *  @adapter: board private structure
5111  *
5112  *  The unicast table address is a register array of 32-bit registers.
5113  *  The table is meant to be used in a way similar to how the MTA is used
5114  *  however due to certain limitations in the hardware it is necessary to
5115  *  set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscous
5116  *  enable bit to allow vlan tag stripping when promiscous mode is enabled
5117  **/
5118 static void igb_set_uta(struct igb_adapter *adapter)
5119 {
5120         struct e1000_hw *hw = &adapter->hw;
5121         int i;
5122
5123         /* The UTA table only exists on 82576 hardware and newer */
5124         if (hw->mac.type < e1000_82576)
5125                 return;
5126
5127         /* we only need to do this if VMDq is enabled */
5128         if (!adapter->vfs_allocated_count)
5129                 return;
5130
5131         for (i = 0; i < hw->mac.uta_reg_count; i++)
5132                 array_wr32(E1000_UTA, i, ~0);
5133 }
5134
5135 /**
5136  * igb_intr_msi - Interrupt Handler
5137  * @irq: interrupt number
5138  * @data: pointer to a network interface device structure
5139  **/
5140 static irqreturn_t igb_intr_msi(int irq, void *data)
5141 {
5142         struct igb_adapter *adapter = data;
5143         struct igb_q_vector *q_vector = adapter->q_vector[0];
5144         struct e1000_hw *hw = &adapter->hw;
5145         /* read ICR disables interrupts using IAM */
5146         u32 icr = rd32(E1000_ICR);
5147
5148         igb_write_itr(q_vector);
5149
5150         if (icr & E1000_ICR_DRSTA)
5151                 schedule_work(&adapter->reset_task);
5152
5153         if (icr & E1000_ICR_DOUTSYNC) {
5154                 /* HW is reporting DMA is out of sync */
5155                 adapter->stats.doosync++;
5156         }
5157
5158         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5159                 hw->mac.get_link_status = 1;
5160                 if (!test_bit(__IGB_DOWN, &adapter->state))
5161                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5162         }
5163
5164         napi_schedule(&q_vector->napi);
5165
5166         return IRQ_HANDLED;
5167 }
5168
5169 /**
5170  * igb_intr - Legacy Interrupt Handler
5171  * @irq: interrupt number
5172  * @data: pointer to a network interface device structure
5173  **/
5174 static irqreturn_t igb_intr(int irq, void *data)
5175 {
5176         struct igb_adapter *adapter = data;
5177         struct igb_q_vector *q_vector = adapter->q_vector[0];
5178         struct e1000_hw *hw = &adapter->hw;
5179         /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked.  No
5180          * need for the IMC write */
5181         u32 icr = rd32(E1000_ICR);
5182         if (!icr)
5183                 return IRQ_NONE;  /* Not our interrupt */
5184
5185         igb_write_itr(q_vector);
5186
5187         /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is
5188          * not set, then the adapter didn't send an interrupt */
5189         if (!(icr & E1000_ICR_INT_ASSERTED))
5190                 return IRQ_NONE;
5191
5192         if (icr & E1000_ICR_DRSTA)
5193                 schedule_work(&adapter->reset_task);
5194
5195         if (icr & E1000_ICR_DOUTSYNC) {
5196                 /* HW is reporting DMA is out of sync */
5197                 adapter->stats.doosync++;
5198         }
5199
5200         if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) {
5201                 hw->mac.get_link_status = 1;
5202                 /* guard against interrupt when we're going down */
5203                 if (!test_bit(__IGB_DOWN, &adapter->state))
5204                         mod_timer(&adapter->watchdog_timer, jiffies + 1);
5205         }
5206
5207         napi_schedule(&q_vector->napi);
5208
5209         return IRQ_HANDLED;
5210 }
5211
5212 static inline void igb_ring_irq_enable(struct igb_q_vector *q_vector)
5213 {
5214         struct igb_adapter *adapter = q_vector->adapter;
5215         struct e1000_hw *hw = &adapter->hw;
5216
5217         if ((q_vector->rx_ring && (adapter->rx_itr_setting & 3)) ||
5218             (!q_vector->rx_ring && (adapter->tx_itr_setting & 3))) {
5219                 if (!adapter->msix_entries)
5220                         igb_set_itr(adapter);
5221                 else
5222                         igb_update_ring_itr(q_vector);
5223         }
5224
5225         if (!test_bit(__IGB_DOWN, &adapter->state)) {
5226                 if (adapter->msix_entries)
5227                         wr32(E1000_EIMS, q_vector->eims_value);
5228                 else
5229                         igb_irq_enable(adapter);
5230         }
5231 }
5232
5233 /**
5234  * igb_poll - NAPI Rx polling callback
5235  * @napi: napi polling structure
5236  * @budget: count of how many packets we should handle
5237  **/
5238 static int igb_poll(struct napi_struct *napi, int budget)
5239 {
5240         struct igb_q_vector *q_vector = container_of(napi,
5241                                                      struct igb_q_vector,
5242                                                      napi);
5243         int tx_clean_complete = 1, work_done = 0;
5244
5245 #ifdef CONFIG_IGB_DCA
5246         if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED)
5247                 igb_update_dca(q_vector);
5248 #endif
5249         if (q_vector->tx_ring)
5250                 tx_clean_complete = igb_clean_tx_irq(q_vector);
5251
5252         if (q_vector->rx_ring)
5253                 igb_clean_rx_irq_adv(q_vector, &work_done, budget);
5254
5255         if (!tx_clean_complete)
5256                 work_done = budget;
5257
5258         /* If not enough Rx work done, exit the polling mode */
5259         if (work_done < budget) {
5260                 napi_complete(napi);
5261                 igb_ring_irq_enable(q_vector);
5262         }
5263
5264         return work_done;
5265 }
5266
5267 /**
5268  * igb_systim_to_hwtstamp - convert system time value to hw timestamp
5269  * @adapter: board private structure
5270  * @shhwtstamps: timestamp structure to update
5271  * @regval: unsigned 64bit system time value.
5272  *
5273  * We need to convert the system time value stored in the RX/TXSTMP registers
5274  * into a hwtstamp which can be used by the upper level timestamping functions
5275  */
5276 static void igb_systim_to_hwtstamp(struct igb_adapter *adapter,
5277                                    struct skb_shared_hwtstamps *shhwtstamps,
5278                                    u64 regval)
5279 {
5280         u64 ns;
5281
5282         /*
5283          * The 82580 starts with 1ns at bit 0 in RX/TXSTMPL, shift this up to
5284          * 24 to match clock shift we setup earlier.
5285          */
5286         if (adapter->hw.mac.type == e1000_82580)
5287                 regval <<= IGB_82580_TSYNC_SHIFT;
5288
5289         ns = timecounter_cyc2time(&adapter->clock, regval);
5290         timecompare_update(&adapter->compare, ns);
5291         memset(shhwtstamps, 0, sizeof(struct skb_shared_hwtstamps));
5292         shhwtstamps->hwtstamp = ns_to_ktime(ns);
5293         shhwtstamps->syststamp = timecompare_transform(&adapter->compare, ns);
5294 }
5295
5296 /**
5297  * igb_tx_hwtstamp - utility function which checks for TX time stamp
5298  * @q_vector: pointer to q_vector containing needed info
5299  * @buffer: pointer to igb_buffer structure
5300  *
5301  * If we were asked to do hardware stamping and such a time stamp is
5302  * available, then it must have been for this skb here because we only
5303  * allow only one such packet into the queue.
5304  */
5305 static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *buffer_info)
5306 {
5307         struct igb_adapter *adapter = q_vector->adapter;
5308         struct e1000_hw *hw = &adapter->hw;
5309         struct skb_shared_hwtstamps shhwtstamps;
5310         u64 regval;
5311
5312         /* if skb does not support hw timestamp or TX stamp not valid exit */
5313         if (likely(!buffer_info->shtx.hardware) ||
5314             !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID))
5315                 return;
5316
5317         regval = rd32(E1000_TXSTMPL);
5318         regval |= (u64)rd32(E1000_TXSTMPH) << 32;
5319
5320         igb_systim_to_hwtstamp(adapter, &shhwtstamps, regval);
5321         skb_tstamp_tx(buffer_info->skb, &shhwtstamps);
5322 }
5323
5324 /**
5325  * igb_clean_tx_irq - Reclaim resources after transmit completes
5326  * @q_vector: pointer to q_vector containing needed info
5327  * returns true if ring is completely cleaned
5328  **/
5329 static bool igb_clean_tx_irq(struct igb_q_vector *q_vector)
5330 {
5331         struct igb_adapter *adapter = q_vector->adapter;
5332         struct igb_ring *tx_ring = q_vector->tx_ring;
5333         struct net_device *netdev = tx_ring->netdev;
5334         struct e1000_hw *hw = &adapter->hw;
5335         struct igb_buffer *buffer_info;
5336         union e1000_adv_tx_desc *tx_desc, *eop_desc;
5337         unsigned int total_bytes = 0, total_packets = 0;
5338         unsigned int i, eop, count = 0;
5339         bool cleaned = false;
5340
5341         i = tx_ring->next_to_clean;
5342         eop = tx_ring->buffer_info[i].next_to_watch;
5343         eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5344
5345         while ((eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD)) &&
5346                (count < tx_ring->count)) {
5347                 for (cleaned = false; !cleaned; count++) {
5348                         tx_desc = E1000_TX_DESC_ADV(*tx_ring, i);
5349                         buffer_info = &tx_ring->buffer_info[i];
5350                         cleaned = (i == eop);
5351
5352                         if (buffer_info->skb) {
5353                                 total_bytes += buffer_info->bytecount;
5354                                 /* gso_segs is currently only valid for tcp */
5355                                 total_packets += buffer_info->gso_segs;
5356                                 igb_tx_hwtstamp(q_vector, buffer_info);
5357                         }
5358
5359                         igb_unmap_and_free_tx_resource(tx_ring, buffer_info);
5360                         tx_desc->wb.status = 0;
5361
5362                         i++;
5363                         if (i == tx_ring->count)
5364                                 i = 0;
5365                 }
5366                 eop = tx_ring->buffer_info[i].next_to_watch;
5367                 eop_desc = E1000_TX_DESC_ADV(*tx_ring, eop);
5368         }
5369
5370         tx_ring->next_to_clean = i;
5371
5372         if (unlikely(count &&
5373                      netif_carrier_ok(netdev) &&
5374                      igb_desc_unused(tx_ring) >= IGB_TX_QUEUE_WAKE)) {
5375                 /* Make sure that anybody stopping the queue after this
5376                  * sees the new next_to_clean.
5377                  */
5378                 smp_mb();
5379                 if (__netif_subqueue_stopped(netdev, tx_ring->queue_index) &&
5380                     !(test_bit(__IGB_DOWN, &adapter->state))) {
5381                         netif_wake_subqueue(netdev, tx_ring->queue_index);
5382                         tx_ring->tx_stats.restart_queue++;
5383                 }
5384         }
5385
5386         if (tx_ring->detect_tx_hung) {
5387                 /* Detect a transmit hang in hardware, this serializes the
5388                  * check with the clearing of time_stamp and movement of i */
5389                 tx_ring->detect_tx_hung = false;
5390                 if (tx_ring->buffer_info[i].time_stamp &&
5391                     time_after(jiffies, tx_ring->buffer_info[i].time_stamp +
5392                                (adapter->tx_timeout_factor * HZ)) &&
5393                     !(rd32(E1000_STATUS) & E1000_STATUS_TXOFF)) {
5394
5395                         /* detected Tx unit hang */
5396                         dev_err(tx_ring->dev,
5397                                 "Detected Tx Unit Hang\n"
5398                                 "  Tx Queue             <%d>\n"
5399                                 "  TDH                  <%x>\n"
5400                                 "  TDT                  <%x>\n"
5401                                 "  next_to_use          <%x>\n"
5402                                 "  next_to_clean        <%x>\n"
5403                                 "buffer_info[next_to_clean]\n"
5404                                 "  time_stamp           <%lx>\n"
5405                                 "  next_to_watch        <%x>\n"
5406                                 "  jiffies              <%lx>\n"
5407                                 "  desc.status          <%x>\n",
5408                                 tx_ring->queue_index,
5409                                 readl(tx_ring->head),
5410                                 readl(tx_ring->tail),
5411                                 tx_ring->next_to_use,
5412                                 tx_ring->next_to_clean,
5413                                 tx_ring->buffer_info[eop].time_stamp,
5414                                 eop,
5415                                 jiffies,
5416                                 eop_desc->wb.status);
5417                         netif_stop_subqueue(netdev, tx_ring->queue_index);
5418                 }
5419         }
5420         tx_ring->total_bytes += total_bytes;
5421         tx_ring->total_packets += total_packets;
5422         tx_ring->tx_stats.bytes += total_bytes;
5423         tx_ring->tx_stats.packets += total_packets;
5424         return (count < tx_ring->count);
5425 }
5426
5427 /**
5428  * igb_receive_skb - helper function to handle rx indications
5429  * @q_vector: structure containing interrupt and ring information
5430  * @skb: packet to send up
5431  * @vlan_tag: vlan tag for packet
5432  **/
5433 static void igb_receive_skb(struct igb_q_vector *q_vector,
5434                             struct sk_buff *skb,
5435                             u16 vlan_tag)
5436 {
5437         struct igb_adapter *adapter = q_vector->adapter;
5438
5439         if (vlan_tag && adapter->vlgrp)
5440                 vlan_gro_receive(&q_vector->napi, adapter->vlgrp,
5441                                  vlan_tag, skb);
5442         else
5443                 napi_gro_receive(&q_vector->napi, skb);
5444 }
5445
5446 static inline void igb_rx_checksum_adv(struct igb_ring *ring,
5447                                        u32 status_err, struct sk_buff *skb)
5448 {
5449         skb->ip_summed = CHECKSUM_NONE;
5450
5451         /* Ignore Checksum bit is set or checksum is disabled through ethtool */
5452         if (!(ring->flags & IGB_RING_FLAG_RX_CSUM) ||
5453              (status_err & E1000_RXD_STAT_IXSM))
5454                 return;
5455
5456         /* TCP/UDP checksum error bit is set */
5457         if (status_err &
5458             (E1000_RXDEXT_STATERR_TCPE | E1000_RXDEXT_STATERR_IPE)) {
5459                 /*
5460                  * work around errata with sctp packets where the TCPE aka
5461                  * L4E bit is set incorrectly on 64 byte (60 byte w/o crc)
5462                  * packets, (aka let the stack check the crc32c)
5463                  */
5464                 if ((skb->len == 60) &&
5465                     (ring->flags & IGB_RING_FLAG_RX_SCTP_CSUM))
5466                         ring->rx_stats.csum_err++;
5467
5468                 /* let the stack verify checksum errors */
5469                 return;
5470         }
5471         /* It must be a TCP or UDP packet with a valid checksum */
5472         if (status_err & (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS))
5473                 skb->ip_summed = CHECKSUM_UNNECESSARY;
5474
5475         dev_dbg(ring->dev, "cksum success: bits %08X\n", status_err);
5476 }
5477
5478 static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr,
5479                                    struct sk_buff *skb)
5480 {
5481         struct igb_adapter *adapter = q_vector->adapter;
5482         struct e1000_hw *hw = &adapter->hw;
5483         u64 regval;
5484
5485         /*
5486          * If this bit is set, then the RX registers contain the time stamp. No
5487          * other packet will be time stamped until we read these registers, so
5488          * read the registers to make them available again. Because only one
5489          * packet can be time stamped at a time, we know that the register
5490          * values must belong to this one here and therefore we don't need to
5491          * compare any of the additional attributes stored for it.
5492          *
5493          * If nothing went wrong, then it should have a skb_shared_tx that we
5494          * can turn into a skb_shared_hwtstamps.
5495          */
5496         if (staterr & E1000_RXDADV_STAT_TSIP) {
5497                 u32 *stamp = (u32 *)skb->data;
5498                 regval = le32_to_cpu(*(stamp + 2));
5499                 regval |= (u64)le32_to_cpu(*(stamp + 3)) << 32;
5500                 skb_pull(skb, IGB_TS_HDR_LEN);
5501         } else {
5502                 if(!(rd32(E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID))
5503                         return;
5504
5505                 regval = rd32(E1000_RXSTMPL);
5506                 regval |= (u64)rd32(E1000_RXSTMPH) << 32;
5507         }
5508
5509         igb_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
5510 }
5511 static inline u16 igb_get_hlen(struct igb_ring *rx_ring,
5512                                union e1000_adv_rx_desc *rx_desc)
5513 {
5514         /* HW will not DMA in data larger than the given buffer, even if it
5515          * parses the (NFS, of course) header to be larger.  In that case, it
5516          * fills the header buffer and spills the rest into the page.
5517          */
5518         u16 hlen = (le16_to_cpu(rx_desc->wb.lower.lo_dword.hdr_info) &
5519                    E1000_RXDADV_HDRBUFLEN_MASK) >> E1000_RXDADV_HDRBUFLEN_SHIFT;
5520         if (hlen > rx_ring->rx_buffer_len)
5521                 hlen = rx_ring->rx_buffer_len;
5522         return hlen;
5523 }
5524
5525 static bool igb_clean_rx_irq_adv(struct igb_q_vector *q_vector,
5526                                  int *work_done, int budget)
5527 {
5528         struct igb_ring *rx_ring = q_vector->rx_ring;
5529         struct net_device *netdev = rx_ring->netdev;
5530         struct device *dev = rx_ring->dev;
5531         union e1000_adv_rx_desc *rx_desc , *next_rxd;
5532         struct igb_buffer *buffer_info , *next_buffer;
5533         struct sk_buff *skb;
5534         bool cleaned = false;
5535         int cleaned_count = 0;
5536         int current_node = numa_node_id();
5537         unsigned int total_bytes = 0, total_packets = 0;
5538         unsigned int i;
5539         u32 staterr;
5540         u16 length;
5541         u16 vlan_tag;
5542
5543         i = rx_ring->next_to_clean;
5544         buffer_info = &rx_ring->buffer_info[i];
5545         rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5546         staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5547
5548         while (staterr & E1000_RXD_STAT_DD) {
5549                 if (*work_done >= budget)
5550                         break;
5551                 (*work_done)++;
5552
5553                 skb = buffer_info->skb;
5554                 prefetch(skb->data - NET_IP_ALIGN);
5555                 buffer_info->skb = NULL;
5556
5557                 i++;
5558                 if (i == rx_ring->count)
5559                         i = 0;
5560
5561                 next_rxd = E1000_RX_DESC_ADV(*rx_ring, i);
5562                 prefetch(next_rxd);
5563                 next_buffer = &rx_ring->buffer_info[i];
5564
5565                 length = le16_to_cpu(rx_desc->wb.upper.length);
5566                 cleaned = true;
5567                 cleaned_count++;
5568
5569                 if (buffer_info->dma) {
5570                         dma_unmap_single(dev, buffer_info->dma,
5571                                          rx_ring->rx_buffer_len,
5572                                          DMA_FROM_DEVICE);
5573                         buffer_info->dma = 0;
5574                         if (rx_ring->rx_buffer_len >= IGB_RXBUFFER_1024) {
5575                                 skb_put(skb, length);
5576                                 goto send_up;
5577                         }
5578                         skb_put(skb, igb_get_hlen(rx_ring, rx_desc));
5579                 }
5580
5581                 if (length) {
5582                         dma_unmap_page(dev, buffer_info->page_dma,
5583                                        PAGE_SIZE / 2, DMA_FROM_DEVICE);
5584                         buffer_info->page_dma = 0;
5585
5586                         skb_fill_page_desc(skb, skb_shinfo(skb)->nr_frags,
5587                                                 buffer_info->page,
5588                                                 buffer_info->page_offset,
5589                                                 length);
5590
5591                         if ((page_count(buffer_info->page) != 1) ||
5592                             (page_to_nid(buffer_info->page) != current_node))
5593                                 buffer_info->page = NULL;
5594                         else
5595                                 get_page(buffer_info->page);
5596
5597                         skb->len += length;
5598                         skb->data_len += length;
5599                         skb->truesize += length;
5600                 }
5601
5602                 if (!(staterr & E1000_RXD_STAT_EOP)) {
5603                         buffer_info->skb = next_buffer->skb;
5604                         buffer_info->dma = next_buffer->dma;
5605                         next_buffer->skb = skb;
5606                         next_buffer->dma = 0;
5607                         goto next_desc;
5608                 }
5609 send_up:
5610                 if (staterr & E1000_RXDEXT_ERR_FRAME_ERR_MASK) {
5611                         dev_kfree_skb_irq(skb);
5612                         goto next_desc;
5613                 }
5614
5615                 if (staterr & (E1000_RXDADV_STAT_TSIP | E1000_RXDADV_STAT_TS))
5616                         igb_rx_hwtstamp(q_vector, staterr, skb);
5617                 total_bytes += skb->len;
5618                 total_packets++;
5619
5620                 igb_rx_checksum_adv(rx_ring, staterr, skb);
5621
5622                 skb->protocol = eth_type_trans(skb, netdev);
5623                 skb_record_rx_queue(skb, rx_ring->queue_index);
5624
5625                 vlan_tag = ((staterr & E1000_RXD_STAT_VP) ?
5626                             le16_to_cpu(rx_desc->wb.upper.vlan) : 0);
5627
5628                 igb_receive_skb(q_vector, skb, vlan_tag);
5629
5630 next_desc:
5631                 rx_desc->wb.upper.status_error = 0;
5632
5633                 /* return some buffers to hardware, one at a time is too slow */
5634                 if (cleaned_count >= IGB_RX_BUFFER_WRITE) {
5635                         igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5636                         cleaned_count = 0;
5637                 }
5638
5639                 /* use prefetched values */
5640                 rx_desc = next_rxd;
5641                 buffer_info = next_buffer;
5642                 staterr = le32_to_cpu(rx_desc->wb.upper.status_error);
5643         }
5644
5645         rx_ring->next_to_clean = i;
5646         cleaned_count = igb_desc_unused(rx_ring);
5647
5648         if (cleaned_count)
5649                 igb_alloc_rx_buffers_adv(rx_ring, cleaned_count);
5650
5651         rx_ring->total_packets += total_packets;
5652         rx_ring->total_bytes += total_bytes;
5653         rx_ring->rx_stats.packets += total_packets;
5654         rx_ring->rx_stats.bytes += total_bytes;
5655         return cleaned;
5656 }
5657
5658 /**
5659  * igb_alloc_rx_buffers_adv - Replace used receive buffers; packet split
5660  * @adapter: address of board private structure
5661  **/
5662 void igb_alloc_rx_buffers_adv(struct igb_ring *rx_ring, int cleaned_count)
5663 {
5664         struct net_device *netdev = rx_ring->netdev;
5665         union e1000_adv_rx_desc *rx_desc;
5666         struct igb_buffer *buffer_info;
5667         struct sk_buff *skb;
5668         unsigned int i;
5669         int bufsz;
5670
5671         i = rx_ring->next_to_use;
5672         buffer_info = &rx_ring->buffer_info[i];
5673
5674         bufsz = rx_ring->rx_buffer_len;
5675
5676         while (cleaned_count--) {
5677                 rx_desc = E1000_RX_DESC_ADV(*rx_ring, i);
5678
5679                 if ((bufsz < IGB_RXBUFFER_1024) && !buffer_info->page_dma) {
5680                         if (!buffer_info->page) {
5681                                 buffer_info->page = netdev_alloc_page(netdev);
5682                                 if (!buffer_info->page) {
5683                                         rx_ring->rx_stats.alloc_failed++;
5684                                         goto no_buffers;
5685                                 }
5686                                 buffer_info->page_offset = 0;
5687                         } else {
5688                                 buffer_info->page_offset ^= PAGE_SIZE / 2;
5689                         }
5690                         buffer_info->page_dma =
5691                                 dma_map_page(rx_ring->dev, buffer_info->page,
5692                                              buffer_info->page_offset,
5693                                              PAGE_SIZE / 2,
5694                                              DMA_FROM_DEVICE);
5695                         if (dma_mapping_error(rx_ring->dev,
5696                                               buffer_info->page_dma)) {
5697                                 buffer_info->page_dma = 0;
5698                                 rx_ring->rx_stats.alloc_failed++;
5699                                 goto no_buffers;
5700                         }
5701                 }
5702
5703                 skb = buffer_info->skb;
5704                 if (!skb) {
5705                         skb = netdev_alloc_skb_ip_align(netdev, bufsz);
5706                         if (!skb) {
5707                                 rx_ring->rx_stats.alloc_failed++;
5708                                 goto no_buffers;
5709                         }
5710
5711                         buffer_info->skb = skb;
5712                 }
5713                 if (!buffer_info->dma) {
5714                         buffer_info->dma = dma_map_single(rx_ring->dev,
5715                                                           skb->data,
5716                                                           bufsz,
5717                                                           DMA_FROM_DEVICE);
5718                         if (dma_mapping_error(rx_ring->dev,
5719                                               buffer_info->dma)) {
5720                                 buffer_info->dma = 0;
5721                                 rx_ring->rx_stats.alloc_failed++;
5722                                 goto no_buffers;
5723                         }
5724                 }
5725                 /* Refresh the desc even if buffer_addrs didn't change because
5726                  * each write-back erases this info. */
5727                 if (bufsz < IGB_RXBUFFER_1024) {
5728                         rx_desc->read.pkt_addr =
5729                              cpu_to_le64(buffer_info->page_dma);
5730                         rx_desc->read.hdr_addr = cpu_to_le64(buffer_info->dma);
5731                 } else {
5732                         rx_desc->read.pkt_addr = cpu_to_le64(buffer_info->dma);
5733                         rx_desc->read.hdr_addr = 0;
5734                 }
5735
5736                 i++;
5737                 if (i == rx_ring->count)
5738                         i = 0;
5739                 buffer_info = &rx_ring->buffer_info[i];
5740         }
5741
5742 no_buffers:
5743         if (rx_ring->next_to_use != i) {
5744                 rx_ring->next_to_use = i;
5745                 if (i == 0)
5746                         i = (rx_ring->count - 1);
5747                 else
5748                         i--;
5749
5750                 /* Force memory writes to complete before letting h/w
5751                  * know there are new descriptors to fetch.  (Only
5752                  * applicable for weak-ordered memory model archs,
5753                  * such as IA-64). */
5754                 wmb();
5755                 writel(i, rx_ring->tail);
5756         }
5757 }
5758
5759 /**
5760  * igb_mii_ioctl -
5761  * @netdev:
5762  * @ifreq:
5763  * @cmd:
5764  **/
5765 static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5766 {
5767         struct igb_adapter *adapter = netdev_priv(netdev);
5768         struct mii_ioctl_data *data = if_mii(ifr);
5769
5770         if (adapter->hw.phy.media_type != e1000_media_type_copper)
5771                 return -EOPNOTSUPP;
5772
5773         switch (cmd) {
5774         case SIOCGMIIPHY:
5775                 data->phy_id = adapter->hw.phy.addr;
5776                 break;
5777         case SIOCGMIIREG:
5778                 if (igb_read_phy_reg(&adapter->hw, data->reg_num & 0x1F,
5779                                      &data->val_out))
5780                         return -EIO;
5781                 break;
5782         case SIOCSMIIREG:
5783         default:
5784                 return -EOPNOTSUPP;
5785         }
5786         return 0;
5787 }
5788
5789 /**
5790  * igb_hwtstamp_ioctl - control hardware time stamping
5791  * @netdev:
5792  * @ifreq:
5793  * @cmd:
5794  *
5795  * Outgoing time stamping can be enabled and disabled. Play nice and
5796  * disable it when requested, although it shouldn't case any overhead
5797  * when no packet needs it. At most one packet in the queue may be
5798  * marked for time stamping, otherwise it would be impossible to tell
5799  * for sure to which packet the hardware time stamp belongs.
5800  *
5801  * Incoming time stamping has to be configured via the hardware
5802  * filters. Not all combinations are supported, in particular event
5803  * type has to be specified. Matching the kind of event packet is
5804  * not supported, with the exception of "all V2 events regardless of
5805  * level 2 or 4".
5806  *
5807  **/
5808 static int igb_hwtstamp_ioctl(struct net_device *netdev,
5809                               struct ifreq *ifr, int cmd)
5810 {
5811         struct igb_adapter *adapter = netdev_priv(netdev);
5812         struct e1000_hw *hw = &adapter->hw;
5813         struct hwtstamp_config config;
5814         u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED;
5815         u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5816         u32 tsync_rx_cfg = 0;
5817         bool is_l4 = false;
5818         bool is_l2 = false;
5819         u32 regval;
5820
5821         if (copy_from_user(&config, ifr->ifr_data, sizeof(config)))
5822                 return -EFAULT;
5823
5824         /* reserved for future extensions */
5825         if (config.flags)
5826                 return -EINVAL;
5827
5828         switch (config.tx_type) {
5829         case HWTSTAMP_TX_OFF:
5830                 tsync_tx_ctl = 0;
5831         case HWTSTAMP_TX_ON:
5832                 break;
5833         default:
5834                 return -ERANGE;
5835         }
5836
5837         switch (config.rx_filter) {
5838         case HWTSTAMP_FILTER_NONE:
5839                 tsync_rx_ctl = 0;
5840                 break;
5841         case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
5842         case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
5843         case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
5844         case HWTSTAMP_FILTER_ALL:
5845                 /*
5846                  * register TSYNCRXCFG must be set, therefore it is not
5847                  * possible to time stamp both Sync and Delay_Req messages
5848                  * => fall back to time stamping all packets
5849                  */
5850                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5851                 config.rx_filter = HWTSTAMP_FILTER_ALL;
5852                 break;
5853         case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
5854                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5855                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE;
5856                 is_l4 = true;
5857                 break;
5858         case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
5859                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1;
5860                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE;
5861                 is_l4 = true;
5862                 break;
5863         case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
5864         case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
5865                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5866                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE;
5867                 is_l2 = true;
5868                 is_l4 = true;
5869                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5870                 break;
5871         case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
5872         case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
5873                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L2_L4_V2;
5874                 tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE;
5875                 is_l2 = true;
5876                 is_l4 = true;
5877                 config.rx_filter = HWTSTAMP_FILTER_SOME;
5878                 break;
5879         case HWTSTAMP_FILTER_PTP_V2_EVENT:
5880         case HWTSTAMP_FILTER_PTP_V2_SYNC:
5881         case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
5882                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2;
5883                 config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
5884                 is_l2 = true;
5885                 break;
5886         default:
5887                 return -ERANGE;
5888         }
5889
5890         if (hw->mac.type == e1000_82575) {
5891                 if (tsync_rx_ctl | tsync_tx_ctl)
5892                         return -EINVAL;
5893                 return 0;
5894         }
5895
5896         /*
5897          * Per-packet timestamping only works if all packets are
5898          * timestamped, so enable timestamping in all packets as
5899          * long as one rx filter was configured.
5900          */
5901         if ((hw->mac.type == e1000_82580) && tsync_rx_ctl) {
5902                 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED;
5903                 tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL;
5904         }
5905
5906         /* enable/disable TX */
5907         regval = rd32(E1000_TSYNCTXCTL);
5908         regval &= ~E1000_TSYNCTXCTL_ENABLED;
5909         regval |= tsync_tx_ctl;
5910         wr32(E1000_TSYNCTXCTL, regval);
5911
5912         /* enable/disable RX */
5913         regval = rd32(E1000_TSYNCRXCTL);
5914         regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK);
5915         regval |= tsync_rx_ctl;
5916         wr32(E1000_TSYNCRXCTL, regval);
5917
5918         /* define which PTP packets are time stamped */
5919         wr32(E1000_TSYNCRXCFG, tsync_rx_cfg);
5920
5921         /* define ethertype filter for timestamped packets */
5922         if (is_l2)
5923                 wr32(E1000_ETQF(3),
5924                                 (E1000_ETQF_FILTER_ENABLE | /* enable filter */
5925                                  E1000_ETQF_1588 | /* enable timestamping */
5926                                  ETH_P_1588));     /* 1588 eth protocol type */
5927         else
5928                 wr32(E1000_ETQF(3), 0);
5929
5930 #define PTP_PORT 319
5931         /* L4 Queue Filter[3]: filter by destination port and protocol */
5932         if (is_l4) {
5933                 u32 ftqf = (IPPROTO_UDP /* UDP */
5934                         | E1000_FTQF_VF_BP /* VF not compared */
5935                         | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */
5936                         | E1000_FTQF_MASK); /* mask all inputs */
5937                 ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */
5938
5939                 wr32(E1000_IMIR(3), htons(PTP_PORT));
5940                 wr32(E1000_IMIREXT(3),
5941                      (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP));
5942                 if (hw->mac.type == e1000_82576) {
5943                         /* enable source port check */
5944                         wr32(E1000_SPQF(3), htons(PTP_PORT));
5945                         ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP;
5946                 }
5947                 wr32(E1000_FTQF(3), ftqf);
5948         } else {
5949                 wr32(E1000_FTQF(3), E1000_FTQF_MASK);
5950         }
5951         wrfl();
5952
5953         adapter->hwtstamp_config = config;
5954
5955         /* clear TX/RX time stamp registers, just to be sure */
5956         regval = rd32(E1000_TXSTMPH);
5957         regval = rd32(E1000_RXSTMPH);
5958
5959         return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ?
5960                 -EFAULT : 0;
5961 }
5962
5963 /**
5964  * igb_ioctl -
5965  * @netdev:
5966  * @ifreq:
5967  * @cmd:
5968  **/
5969 static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
5970 {
5971         switch (cmd) {
5972         case SIOCGMIIPHY:
5973         case SIOCGMIIREG:
5974         case SIOCSMIIREG:
5975                 return igb_mii_ioctl(netdev, ifr, cmd);
5976         case SIOCSHWTSTAMP:
5977                 return igb_hwtstamp_ioctl(netdev, ifr, cmd);
5978         default:
5979                 return -EOPNOTSUPP;
5980         }
5981 }
5982
5983 s32 igb_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5984 {
5985         struct igb_adapter *adapter = hw->back;
5986         u16 cap_offset;
5987
5988         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
5989         if (!cap_offset)
5990                 return -E1000_ERR_CONFIG;
5991
5992         pci_read_config_word(adapter->pdev, cap_offset + reg, value);
5993
5994         return 0;
5995 }
5996
5997 s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value)
5998 {
5999         struct igb_adapter *adapter = hw->back;
6000         u16 cap_offset;
6001
6002         cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP);
6003         if (!cap_offset)
6004                 return -E1000_ERR_CONFIG;
6005
6006         pci_write_config_word(adapter->pdev, cap_offset + reg, *value);
6007
6008         return 0;
6009 }
6010
6011 static void igb_vlan_rx_register(struct net_device *netdev,
6012                                  struct vlan_group *grp)
6013 {
6014         struct igb_adapter *adapter = netdev_priv(netdev);
6015         struct e1000_hw *hw = &adapter->hw;
6016         u32 ctrl, rctl;
6017
6018         igb_irq_disable(adapter);
6019         adapter->vlgrp = grp;
6020
6021         if (grp) {
6022                 /* enable VLAN tag insert/strip */
6023                 ctrl = rd32(E1000_CTRL);
6024                 ctrl |= E1000_CTRL_VME;
6025                 wr32(E1000_CTRL, ctrl);
6026
6027                 /* Disable CFI check */
6028                 rctl = rd32(E1000_RCTL);
6029                 rctl &= ~E1000_RCTL_CFIEN;
6030                 wr32(E1000_RCTL, rctl);
6031         } else {
6032                 /* disable VLAN tag insert/strip */
6033                 ctrl = rd32(E1000_CTRL);
6034                 ctrl &= ~E1000_CTRL_VME;
6035                 wr32(E1000_CTRL, ctrl);
6036         }
6037
6038         igb_rlpml_set(adapter);
6039
6040         if (!test_bit(__IGB_DOWN, &adapter->state))
6041                 igb_irq_enable(adapter);
6042 }
6043
6044 static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid)
6045 {
6046         struct igb_adapter *adapter = netdev_priv(netdev);
6047         struct e1000_hw *hw = &adapter->hw;
6048         int pf_id = adapter->vfs_allocated_count;
6049
6050         /* attempt to add filter to vlvf array */
6051         igb_vlvf_set(adapter, vid, true, pf_id);
6052
6053         /* add the filter since PF can receive vlans w/o entry in vlvf */
6054         igb_vfta_set(hw, vid, true);
6055 }
6056
6057 static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid)
6058 {
6059         struct igb_adapter *adapter = netdev_priv(netdev);
6060         struct e1000_hw *hw = &adapter->hw;
6061         int pf_id = adapter->vfs_allocated_count;
6062         s32 err;
6063
6064         igb_irq_disable(adapter);
6065         vlan_group_set_device(adapter->vlgrp, vid, NULL);
6066
6067         if (!test_bit(__IGB_DOWN, &adapter->state))
6068                 igb_irq_enable(adapter);
6069
6070         /* remove vlan from VLVF table array */
6071         err = igb_vlvf_set(adapter, vid, false, pf_id);
6072
6073         /* if vid was not present in VLVF just remove it from table */
6074         if (err)
6075                 igb_vfta_set(hw, vid, false);
6076 }
6077
6078 static void igb_restore_vlan(struct igb_adapter *adapter)
6079 {
6080         igb_vlan_rx_register(adapter->netdev, adapter->vlgrp);
6081
6082         if (adapter->vlgrp) {
6083                 u16 vid;
6084                 for (vid = 0; vid < VLAN_GROUP_ARRAY_LEN; vid++) {
6085                         if (!vlan_group_get_device(adapter->vlgrp, vid))
6086                                 continue;
6087                         igb_vlan_rx_add_vid(adapter->netdev, vid);
6088                 }
6089         }
6090 }
6091
6092 int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx)
6093 {
6094         struct pci_dev *pdev = adapter->pdev;
6095         struct e1000_mac_info *mac = &adapter->hw.mac;
6096
6097         mac->autoneg = 0;
6098
6099         switch (spddplx) {
6100         case SPEED_10 + DUPLEX_HALF:
6101                 mac->forced_speed_duplex = ADVERTISE_10_HALF;
6102                 break;
6103         case SPEED_10 + DUPLEX_FULL:
6104                 mac->forced_speed_duplex = ADVERTISE_10_FULL;
6105                 break;
6106         case SPEED_100 + DUPLEX_HALF:
6107                 mac->forced_speed_duplex = ADVERTISE_100_HALF;
6108                 break;
6109         case SPEED_100 + DUPLEX_FULL:
6110                 mac->forced_speed_duplex = ADVERTISE_100_FULL;
6111                 break;
6112         case SPEED_1000 + DUPLEX_FULL:
6113                 mac->autoneg = 1;
6114                 adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL;
6115                 break;
6116         case SPEED_1000 + DUPLEX_HALF: /* not supported */
6117         default:
6118                 dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
6119                 return -EINVAL;
6120         }
6121         return 0;
6122 }
6123
6124 static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake)
6125 {
6126         struct net_device *netdev = pci_get_drvdata(pdev);
6127         struct igb_adapter *adapter = netdev_priv(netdev);
6128         struct e1000_hw *hw = &adapter->hw;
6129         u32 ctrl, rctl, status;
6130         u32 wufc = adapter->wol;
6131 #ifdef CONFIG_PM
6132         int retval = 0;
6133 #endif
6134
6135         netif_device_detach(netdev);
6136
6137         if (netif_running(netdev))
6138                 igb_close(netdev);
6139
6140         igb_clear_interrupt_scheme(adapter);
6141
6142 #ifdef CONFIG_PM
6143         retval = pci_save_state(pdev);
6144         if (retval)
6145                 return retval;
6146 #endif
6147
6148         status = rd32(E1000_STATUS);
6149         if (status & E1000_STATUS_LU)
6150                 wufc &= ~E1000_WUFC_LNKC;
6151
6152         if (wufc) {
6153                 igb_setup_rctl(adapter);
6154                 igb_set_rx_mode(netdev);
6155
6156                 /* turn on all-multi mode if wake on multicast is enabled */
6157                 if (wufc & E1000_WUFC_MC) {
6158                         rctl = rd32(E1000_RCTL);
6159                         rctl |= E1000_RCTL_MPE;
6160                         wr32(E1000_RCTL, rctl);
6161                 }
6162
6163                 ctrl = rd32(E1000_CTRL);
6164                 /* advertise wake from D3Cold */
6165                 #define E1000_CTRL_ADVD3WUC 0x00100000
6166                 /* phy power management enable */
6167                 #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000
6168                 ctrl |= E1000_CTRL_ADVD3WUC;
6169                 wr32(E1000_CTRL, ctrl);
6170
6171                 /* Allow time for pending master requests to run */
6172                 igb_disable_pcie_master(hw);
6173
6174                 wr32(E1000_WUC, E1000_WUC_PME_EN);
6175                 wr32(E1000_WUFC, wufc);
6176         } else {
6177                 wr32(E1000_WUC, 0);
6178                 wr32(E1000_WUFC, 0);
6179         }
6180
6181         *enable_wake = wufc || adapter->en_mng_pt;
6182         if (!*enable_wake)
6183                 igb_power_down_link(adapter);
6184         else
6185                 igb_power_up_link(adapter);
6186
6187         /* Release control of h/w to f/w.  If f/w is AMT enabled, this
6188          * would have already happened in close and is redundant. */
6189         igb_release_hw_control(adapter);
6190
6191         pci_disable_device(pdev);
6192
6193         return 0;
6194 }
6195
6196 #ifdef CONFIG_PM
6197 static int igb_suspend(struct pci_dev *pdev, pm_message_t state)
6198 {
6199         int retval;
6200         bool wake;
6201
6202         retval = __igb_shutdown(pdev, &wake);
6203         if (retval)
6204                 return retval;
6205
6206         if (wake) {
6207                 pci_prepare_to_sleep(pdev);
6208         } else {
6209                 pci_wake_from_d3(pdev, false);
6210                 pci_set_power_state(pdev, PCI_D3hot);
6211         }
6212
6213         return 0;
6214 }
6215
6216 static int igb_resume(struct pci_dev *pdev)
6217 {
6218         struct net_device *netdev = pci_get_drvdata(pdev);
6219         struct igb_adapter *adapter = netdev_priv(netdev);
6220         struct e1000_hw *hw = &adapter->hw;
6221         u32 err;
6222
6223         pci_set_power_state(pdev, PCI_D0);
6224         pci_restore_state(pdev);
6225         pci_save_state(pdev);
6226
6227         err = pci_enable_device_mem(pdev);
6228         if (err) {
6229                 dev_err(&pdev->dev,
6230                         "igb: Cannot enable PCI device from suspend\n");
6231                 return err;
6232         }
6233         pci_set_master(pdev);
6234
6235         pci_enable_wake(pdev, PCI_D3hot, 0);
6236         pci_enable_wake(pdev, PCI_D3cold, 0);
6237
6238         if (igb_init_interrupt_scheme(adapter)) {
6239                 dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
6240                 return -ENOMEM;
6241         }
6242
6243         igb_reset(adapter);
6244
6245         /* let the f/w know that the h/w is now under the control of the
6246          * driver. */
6247         igb_get_hw_control(adapter);
6248
6249         wr32(E1000_WUS, ~0);
6250
6251         if (netif_running(netdev)) {
6252                 err = igb_open(netdev);
6253                 if (err)
6254                         return err;
6255         }
6256
6257         netif_device_attach(netdev);
6258
6259         return 0;
6260 }
6261 #endif
6262
6263 static void igb_shutdown(struct pci_dev *pdev)
6264 {
6265         bool wake;
6266
6267         __igb_shutdown(pdev, &wake);
6268
6269         if (system_state == SYSTEM_POWER_OFF) {
6270                 pci_wake_from_d3(pdev, wake);
6271                 pci_set_power_state(pdev, PCI_D3hot);
6272         }
6273 }
6274
6275 #ifdef CONFIG_NET_POLL_CONTROLLER
6276 /*
6277  * Polling 'interrupt' - used by things like netconsole to send skbs
6278  * without having to re-enable interrupts. It's not called while
6279  * the interrupt routine is executing.
6280  */
6281 static void igb_netpoll(struct net_device *netdev)
6282 {
6283         struct igb_adapter *adapter = netdev_priv(netdev);
6284         struct e1000_hw *hw = &adapter->hw;
6285         int i;
6286
6287         if (!adapter->msix_entries) {
6288                 struct igb_q_vector *q_vector = adapter->q_vector[0];
6289                 igb_irq_disable(adapter);
6290                 napi_schedule(&q_vector->napi);
6291                 return;
6292         }
6293
6294         for (i = 0; i < adapter->num_q_vectors; i++) {
6295                 struct igb_q_vector *q_vector = adapter->q_vector[i];
6296                 wr32(E1000_EIMC, q_vector->eims_value);
6297                 napi_schedule(&q_vector->napi);
6298         }
6299 }
6300 #endif /* CONFIG_NET_POLL_CONTROLLER */
6301
6302 /**
6303  * igb_io_error_detected - called when PCI error is detected
6304  * @pdev: Pointer to PCI device
6305  * @state: The current pci connection state
6306  *
6307  * This function is called after a PCI bus error affecting
6308  * this device has been detected.
6309  */
6310 static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev,
6311                                               pci_channel_state_t state)
6312 {
6313         struct net_device *netdev = pci_get_drvdata(pdev);
6314         struct igb_adapter *adapter = netdev_priv(netdev);
6315
6316         netif_device_detach(netdev);
6317
6318         if (state == pci_channel_io_perm_failure)
6319                 return PCI_ERS_RESULT_DISCONNECT;
6320
6321         if (netif_running(netdev))
6322                 igb_down(adapter);
6323         pci_disable_device(pdev);
6324
6325         /* Request a slot slot reset. */
6326         return PCI_ERS_RESULT_NEED_RESET;
6327 }
6328
6329 /**
6330  * igb_io_slot_reset - called after the pci bus has been reset.
6331  * @pdev: Pointer to PCI device
6332  *
6333  * Restart the card from scratch, as if from a cold-boot. Implementation
6334  * resembles the first-half of the igb_resume routine.
6335  */
6336 static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev)
6337 {
6338         struct net_device *netdev = pci_get_drvdata(pdev);
6339         struct igb_adapter *adapter = netdev_priv(netdev);
6340         struct e1000_hw *hw = &adapter->hw;
6341         pci_ers_result_t result;
6342         int err;
6343
6344         if (pci_enable_device_mem(pdev)) {
6345                 dev_err(&pdev->dev,
6346                         "Cannot re-enable PCI device after reset.\n");
6347                 result = PCI_ERS_RESULT_DISCONNECT;
6348         } else {
6349                 pci_set_master(pdev);
6350                 pci_restore_state(pdev);
6351                 pci_save_state(pdev);
6352
6353                 pci_enable_wake(pdev, PCI_D3hot, 0);
6354                 pci_enable_wake(pdev, PCI_D3cold, 0);
6355
6356                 igb_reset(adapter);
6357                 wr32(E1000_WUS, ~0);
6358                 result = PCI_ERS_RESULT_RECOVERED;
6359         }
6360
6361         err = pci_cleanup_aer_uncorrect_error_status(pdev);
6362         if (err) {
6363                 dev_err(&pdev->dev, "pci_cleanup_aer_uncorrect_error_status "
6364                         "failed 0x%0x\n", err);
6365                 /* non-fatal, continue */
6366         }
6367
6368         return result;
6369 }
6370
6371 /**
6372  * igb_io_resume - called when traffic can start flowing again.
6373  * @pdev: Pointer to PCI device
6374  *
6375  * This callback is called when the error recovery driver tells us that
6376  * its OK to resume normal operation. Implementation resembles the
6377  * second-half of the igb_resume routine.
6378  */
6379 static void igb_io_resume(struct pci_dev *pdev)
6380 {
6381         struct net_device *netdev = pci_get_drvdata(pdev);
6382         struct igb_adapter *adapter = netdev_priv(netdev);
6383
6384         if (netif_running(netdev)) {
6385                 if (igb_up(adapter)) {
6386                         dev_err(&pdev->dev, "igb_up failed after reset\n");
6387                         return;
6388                 }
6389         }
6390
6391         netif_device_attach(netdev);
6392
6393         /* let the f/w know that the h/w is now under the control of the
6394          * driver. */
6395         igb_get_hw_control(adapter);
6396 }
6397
6398 static void igb_rar_set_qsel(struct igb_adapter *adapter, u8 *addr, u32 index,
6399                              u8 qsel)
6400 {
6401         u32 rar_low, rar_high;
6402         struct e1000_hw *hw = &adapter->hw;
6403
6404         /* HW expects these in little endian so we reverse the byte order
6405          * from network order (big endian) to little endian
6406          */
6407         rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) |
6408                   ((u32) addr[2] << 16) | ((u32) addr[3] << 24));
6409         rar_high = ((u32) addr[4] | ((u32) addr[5] << 8));
6410
6411         /* Indicate to hardware the Address is Valid. */
6412         rar_high |= E1000_RAH_AV;
6413
6414         if (hw->mac.type == e1000_82575)
6415                 rar_high |= E1000_RAH_POOL_1 * qsel;
6416         else
6417                 rar_high |= E1000_RAH_POOL_1 << qsel;
6418
6419         wr32(E1000_RAL(index), rar_low);
6420         wrfl();
6421         wr32(E1000_RAH(index), rar_high);
6422         wrfl();
6423 }
6424
6425 static int igb_set_vf_mac(struct igb_adapter *adapter,
6426                           int vf, unsigned char *mac_addr)
6427 {
6428         struct e1000_hw *hw = &adapter->hw;
6429         /* VF MAC addresses start at end of receive addresses and moves
6430          * torwards the first, as a result a collision should not be possible */
6431         int rar_entry = hw->mac.rar_entry_count - (vf + 1);
6432
6433         memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN);
6434
6435         igb_rar_set_qsel(adapter, mac_addr, rar_entry, vf);
6436
6437         return 0;
6438 }
6439
6440 static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
6441 {
6442         struct igb_adapter *adapter = netdev_priv(netdev);
6443         if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count))
6444                 return -EINVAL;
6445         adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC;
6446         dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf);
6447         dev_info(&adapter->pdev->dev, "Reload the VF driver to make this"
6448                                       " change effective.");
6449         if (test_bit(__IGB_DOWN, &adapter->state)) {
6450                 dev_warn(&adapter->pdev->dev, "The VF MAC address has been set,"
6451                          " but the PF device is not up.\n");
6452                 dev_warn(&adapter->pdev->dev, "Bring the PF device up before"
6453                          " attempting to use the VF device.\n");
6454         }
6455         return igb_set_vf_mac(adapter, vf, mac);
6456 }
6457
6458 static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate)
6459 {
6460         return -EOPNOTSUPP;
6461 }
6462
6463 static int igb_ndo_get_vf_config(struct net_device *netdev,
6464                                  int vf, struct ifla_vf_info *ivi)
6465 {
6466         struct igb_adapter *adapter = netdev_priv(netdev);
6467         if (vf >= adapter->vfs_allocated_count)
6468                 return -EINVAL;
6469         ivi->vf = vf;
6470         memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN);
6471         ivi->tx_rate = 0;
6472         ivi->vlan = adapter->vf_data[vf].pf_vlan;
6473         ivi->qos = adapter->vf_data[vf].pf_qos;
6474         return 0;
6475 }
6476
6477 static void igb_vmm_control(struct igb_adapter *adapter)
6478 {
6479         struct e1000_hw *hw = &adapter->hw;
6480         u32 reg;
6481
6482         switch (hw->mac.type) {
6483         case e1000_82575:
6484         default:
6485                 /* replication is not supported for 82575 */
6486                 return;
6487         case e1000_82576:
6488                 /* notify HW that the MAC is adding vlan tags */
6489                 reg = rd32(E1000_DTXCTL);
6490                 reg |= E1000_DTXCTL_VLAN_ADDED;
6491                 wr32(E1000_DTXCTL, reg);
6492         case e1000_82580:
6493                 /* enable replication vlan tag stripping */
6494                 reg = rd32(E1000_RPLOLR);
6495                 reg |= E1000_RPLOLR_STRVLAN;
6496                 wr32(E1000_RPLOLR, reg);
6497         case e1000_i350:
6498                 /* none of the above registers are supported by i350 */
6499                 break;
6500         }
6501
6502         if (adapter->vfs_allocated_count) {
6503                 igb_vmdq_set_loopback_pf(hw, true);
6504                 igb_vmdq_set_replication_pf(hw, true);
6505         } else {
6506                 igb_vmdq_set_loopback_pf(hw, false);
6507                 igb_vmdq_set_replication_pf(hw, false);
6508         }
6509 }
6510
6511 /* igb_main.c */